In [1]:
from pathlib import Path
import numpy as np
import pysam
from tqdm import tqdm
from itertools import repeat
import polars as pl
import matplotlib.pyplot as plt
import pandas as pd
import plotly.graph_objects as go
import plotly
import plotly.io as pio
import argparse
from random import seed,uniform


def view_modifications(bamfile_path:str, IVT_path:str, reference_path:str, literature_mod_df_path:str, condition:str):
    literature_mod_df = pd.read_csv(literature_mod_df_path ,sep="\t",header=None,index_col=None) #"~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed"
    literature_mod_df.columns = ["reference","start","end","modification","A","B","C"]

    psu_mod_df = literature_mod_df.loc[literature_mod_df["modification"] == "psu"]
    Um_mod_df = literature_mod_df.loc[literature_mod_df["modification"] == "Um"]
    
    A_mod_df = literature_mod_df.loc[literature_mod_df["modification"].isin(["Am","m62A","m6A"])]


    fasta_file = pysam.FastaFile(reference_path)
    reference = fasta_file.references[0]
    reference_sequence = str(fasta_file.fetch(reference))

    reads_aligning = [0 for i in range(len(reference_sequence))]
    mod_positions_m6a = [0 for i in range(len(reference_sequence))]
    mod_positions_pseU = [0 for i in range(len(reference_sequence))]
    number_of_basecalled_C = [0 for i in range(len(reference_sequence))]

    bamfile = pysam.AlignmentFile(bamfile_path, mode="rb")
    for i in tqdm(bamfile.fetch(until_eof=True)):
        if i.is_supplementary:
            continue
        start = i.reference_start
        end = i.reference_end
        for index in range(start,end):
            reads_aligning[index] += 1
        mod_obj = i.modified_bases
        if mod_obj != None:
            # try:
            #     mod_m6a = list(mod_obj[('A', 0, 'a')])
            # except KeyError:
            #     mod_m6a = None
            try:
                mod_pseU = list(mod_obj[('T', 0, 17802)])
            except KeyError:
                mod_pseU = None
            try:
                aligned_pairs = i.get_aligned_pairs(with_seq=True)
                alignment_dict = {}
                for pair_element in aligned_pairs:
                    if None not in pair_element:
                        alignment_dict[str(pair_element[0])] = {"index_query":pair_element[0],"index_reference":pair_element[1],"base_query": str(i.get_forward_sequence())[pair_element[0]],"base_reference": reference_sequence[pair_element[1]]}
                #if mod_m6a != None:
                #    for mod_base in mod_m6a:
                #        p = ((mod_base[1] + 1)/256)
                #        if p >= 0.95 and str(mod_base[0]) in alignment_dict:
                #            mod_positions_m6a[alignment_dict[str(mod_base[0])]["index_reference"]] += 1

                if mod_pseU != None:
                    for mod_base2 in mod_pseU:
                        p = ((mod_base2[1] + 1)/256)
                        if p >= 0.95 and str(mod_base2[0]) in alignment_dict:
                            mod_positions_pseU[alignment_dict[str(mod_base2[0])]["index_reference"]] += 1
                            
                for pair_element in aligned_pairs:
                    if None not in pair_element:
                        if alignment_dict[str(pair_element[0])]["base_query"] == "C" and alignment_dict[str(pair_element[0])]["base_reference"] == "T":
                            number_of_basecalled_C[pair_element[1]] += 1
            except TypeError:
                print("A type error occured")
    
    reads_aligning = np.array(reads_aligning)
    reads_aligning[reads_aligning == 0] = 1


    positions = [i+1 for i in range(len(reads_aligning))]

    mod_positions_m6a = np.array(mod_positions_m6a)
    rel_mod_positions_m6a = mod_positions_m6a/reads_aligning

    mod_positions_pseU = np.array(mod_positions_pseU)
    rel_mod_positions_pseU = mod_positions_pseU/reads_aligning

    number_of_basecalled_C = np.array(number_of_basecalled_C)
    rel_number_of_basecalled_C = number_of_basecalled_C/reads_aligning

    dataset = {
        "position":positions,
        "reads_aligning":reads_aligning,
        "n_pseU":mod_positions_pseU,
        "n_m6a":number_of_basecalled_C,
        "n_C":number_of_basecalled_C,
        "rel_n_pseU":rel_mod_positions_pseU,
        "rel_n_m6a":rel_mod_positions_m6a,
        "rel_n_C":rel_number_of_basecalled_C
    }

    modification_df = pd.DataFrame(dataset)
    modification_df.to_csv(f"{condition}_modification_quantification.csv",sep=";",header=True,index=None)
    
    IVT_reads_aligning = [0 for i in range(len(reference_sequence))]
    IVT_mod_positions_m6a = [0 for i in range(len(reference_sequence))]
    IVT_mod_positions_pseU = [0 for i in range(len(reference_sequence))]
    IVT_number_of_basecalled_C = [0 for i in range(len(reference_sequence))]
    

    IVT_bamfile = pysam.AlignmentFile(IVT_path, mode="rb")
    counter = 0
    for i in tqdm(IVT_bamfile.fetch(until_eof=True)):
        if counter >= 20000:
            break
        if i.is_supplementary:
            continue
        start = i.reference_start
        end = i.reference_end
        for index in range(start,end):
            IVT_reads_aligning[index] += 1
        mod_obj = i.modified_bases
        if mod_obj != None:
            # try:
            #     mod_m6a = list(mod_obj[('A', 0, 'a')])
            # except KeyError:
            #     mod_m6a = None
            try:
                mod_pseU = list(mod_obj[('T', 0, 17802)])
            except KeyError:
                mod_pseU = None
            aligned_pairs = i.get_aligned_pairs(with_seq=True)
            alignment_dict = {}
            for pair_element in aligned_pairs:
                if None not in pair_element:
                    alignment_dict[str(pair_element[0])] = {"index_query":pair_element[0],
                                                            "index_reference":pair_element[1],
                                                            "base_query": str(i.get_forward_sequence())[pair_element[0]],
                                                            "base_reference": reference_sequence[pair_element[1]]
                                                            }
            #if mod_m6a != None:
            #    for mod_base in mod_m6a:
            #        p = ((mod_base[1] + 1)/256)
            #        if p >= 0.95 and str(mod_base[0]) in alignment_dict:
            #            IVT_mod_positions_m6a[alignment_dict[str(mod_base[0])]["index_reference"]] += 1

            if mod_pseU != None:
                for mod_base2 in mod_pseU:
                    p = ((mod_base2[1] + 1)/256)
                    if p >= 0.95 and str(mod_base2[0]) in alignment_dict:
                        IVT_mod_positions_pseU[alignment_dict[str(mod_base2[0])]["index_reference"]] += 1
                        
            for pair_element in aligned_pairs:
                if None not in pair_element:
                    if alignment_dict[str(pair_element[0])]["base_query"] == "C" and alignment_dict[str(pair_element[0])]["base_reference"] == "T":
                        IVT_number_of_basecalled_C[pair_element[1]] += 1
        counter += 1

    IVT_reads_aligning = np.array(IVT_reads_aligning)
    IVT_reads_aligning[IVT_reads_aligning == 0] = 1


    IVT_positions = [i+1 for i in range(len(IVT_reads_aligning))]

    IVT_mod_positions_m6a = np.array(IVT_mod_positions_m6a)
    IVT_rel_mod_positions_m6a = IVT_mod_positions_m6a/IVT_reads_aligning

    IVT_mod_positions_pseU = np.array(IVT_mod_positions_pseU)
    IVT_rel_mod_positions_pseU = IVT_mod_positions_pseU/IVT_reads_aligning

    IVT_number_of_basecalled_C = np.array(IVT_number_of_basecalled_C)
    IVT_rel_number_of_basecalled_C = IVT_number_of_basecalled_C/IVT_reads_aligning
    
    IVT_dataset = {
        "position":IVT_positions,
        "reads_aligning":IVT_reads_aligning,
        "n_pseU":IVT_mod_positions_pseU,
        "n_m6a":IVT_number_of_basecalled_C,
        "n_C":IVT_number_of_basecalled_C,
        "rel_n_pseU":IVT_rel_mod_positions_pseU,
        "rel_n_m6a":IVT_rel_mod_positions_m6a,
        "rel_n_C":IVT_rel_number_of_basecalled_C
    }
    
    IVT_modification_df = pd.DataFrame(IVT_dataset)
    IVT_modification_df.to_csv(f"IVT_modification_quantification.csv",sep=";",header=True,index=None)

    

    layout = go.Layout(height = 800)
    fig = go.Figure(layout=layout)

    

    


    fig.add_trace(
            go.Scatter(
                x=[index for index in range(len(rel_number_of_basecalled_C))],
                y=[rel_mod_positions_pseU[index]+value for index,value in enumerate(rel_number_of_basecalled_C)],  # Use the same x position and y at the top of the line
                line_color="rgba(153,0,0,0.4)",
                showlegend=True,
                name="C/U + pseU freq."
            )
        )
    
    fig.add_trace(
        go.Scatter(
            x=[index for index in range(len(rel_mod_positions_pseU))],
            y=[value for index,value in enumerate(rel_mod_positions_pseU)],  # Use the same x position and y at the top of the line
            line_color="rgba(153,0,0,1)",
            showlegend=True,
            name="pseU freq."
        )
    )
    
    
    
    fig.add_trace(
            go.Scatter(
                x=[index for index in range(len(IVT_rel_number_of_basecalled_C))],
                y=[-IVT_rel_mod_positions_pseU[index]-value for index,value in enumerate(IVT_rel_number_of_basecalled_C)],  # Use the same x position and y at the top of the line
                line_color="rgba(0, 181, 204, 1)",
                showlegend=True,
                name="IVT 18S C/U + pseU freq."
            )
        )
    
    fig.add_trace(
            go.Scatter(
                x=[index for index in range(len(IVT_rel_mod_positions_pseU))],
                y=[-value for index,value in enumerate(IVT_rel_mod_positions_pseU)],   # Use the same x position and y at the top of the line
                line_color="rgba(30, 81, 123, 1)",
                showlegend=True,
                name="IVT 18S pseU freq."
            )
        )

    fig.add_trace(
        go.Scatter(
            x = [i for i in range(0,len(reference_sequence))],
            y = [0 for i in range(0,len(reference_sequence))],
            name= "",
            showlegend=False,
            line_color = "white"
            )
        )

    for index,T_position in psu_mod_df.iterrows():
        fig.add_shape(
            x0=T_position["end"]-1,
            x1=T_position["end"]-1,
            y0=0,
            y1=1,
            line=dict(
                color="rgba(90,34,139,0.5)",
                width=0.3,
                dash="dash"
                )
            )
    
    for index,T_position in Um_mod_df.iterrows():
        fig.add_shape(
            x0=T_position["end"]-1,
            x1=T_position["end"]-1,
            y0=0,
            y1=1,
            line=dict(
                color="rgba(4,59,92,0.5)",
                width=0.3,
                dash="dash"
                )
            )
    
    fig.add_trace(go.Scatter(
        x=[None], y=[None],  # Invisible point, used only for legend entry
        mode='lines',
        line=dict(
            color="rgba(255,106,106,0.5)",
            width=1,
            dash="dash"
        ),
        showlegend=True,
        name="known pseU"  # Legend entry name
    ))
    
    fig.add_trace(go.Scatter(
        x=[None], y=[None],  # Invisible point, used only for legend entry
        mode='lines',
        line=dict(
            color="rgba(4,59,92,0.7)",
            width=1,
            dash="dash"
        ),
        showlegend=True,
        name="known Um"  # Legend entry name
    ))
    
    

    fig.update_layout(
        title=f"Modification basecalling",
        xaxis=dict(title="Position on reference",gridcolor = "white",tickformat="d"),
        yaxis=dict(title="Modification frequency",gridcolor = "white"),
        plot_bgcolor='rgba(0,0,0,0)'
    )

    fig.write_image(f"{condition}_general_modification_ratio_with_Um.svg",format="svg")
    fig.show()
    
    #5s
    fig.update_layout(
        xaxis=dict(range=[6550,6800])
    )
    fig.write_image(f"{condition}_5-8S_modification_ratio_with_Um.svg",format="svg")
    fig.show()
    
    
    #18S
    fig.update_layout(
        xaxis=dict(range=[3655,5620])
    )
    fig.write_image(f"{condition}_18S_modification_ratio_with_Um.svg",format="svg")
    fig.show()
    
    #28S
    fig.update_layout(
        xaxis=dict(range=[7910,13000])
    )
    fig.write_image(f"{condition}_28S_modification_ratio_with_Um.svg",format="svg")
    fig.show()
    

    

In [2]:
from pathlib import Path
import numpy as np
import pysam
from tqdm import tqdm
from itertools import repeat
import polars as pl
import matplotlib.pyplot as plt
import pandas as pd
import plotly.graph_objects as go
import plotly
import plotly.io as pio
import argparse
from random import seed,uniform
def view_modification_per_fragment(bamfile_path:str, fragment_df_path:str,fragment_to_analyse: str,color:str,markerstyle:str, reference_path:str, literature_mod_df_path:str, condition:str, fig, ax, modification_csv_path=""):
    fragment_df = pl.read_csv(fragment_df_path, separator=";",has_header=True,columns=["Reference","Start","End","Fragment","Length","IDS","n_Reads","rel_n_Reads"])
    #print(fragment_df)
    fragment_df = fragment_df.to_pandas()
    fragment_df = fragment_df[fragment_df["Fragment"] == fragment_to_analyse]
    #print(fragment_df)
    fragment_ids = fragment_df["IDS"].to_list()[0]
    IDS_of_interest = eval(fragment_ids)
    #print(IDS_of_interest[0])
    dict_IDS_of_interest = {}
    for id in IDS_of_interest:
        dict_IDS_of_interest[id] = id
    
    literature_mod_df = pd.read_csv(literature_mod_df_path ,sep="\t",header=None,index_col=None) #"~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed"
    literature_mod_df.columns = ["reference","start","end","modification","A","B","C"]

    psu_mod_df = literature_mod_df.loc[literature_mod_df["modification"] == "psu"]
    psU_mod_df_positions = [int(end) for end in list(psu_mod_df["end"]) if int(end) < 6500]
    #Um_mod_df = literature_mod_df.loc[literature_mod_df["modification"] == "Um"]
    
    #A_mod_df = literature_mod_df.loc[literature_mod_df["modification"].isin(["Am","m62A","m6A"])] 


    fasta_file = pysam.FastaFile(reference_path)
    reference = fasta_file.references[0]
    reference_sequence = str(fasta_file.fetch(reference))

    reads_aligning = [0 for i in range(len(reference_sequence))]
    #mod_positions_m6a = [0 for i in range(len(reference_sequence))]
    mod_positions_pseU = [0 for i in range(len(reference_sequence))]
    number_of_basecalled_C = [0 for i in range(len(reference_sequence))]
    if modification_csv_path == "":
        bamfile = pysam.AlignmentFile(bamfile_path, mode="rb")
        counter = 0
        
        for i in tqdm(bamfile.fetch(until_eof=True)):
            if i.is_supplementary:
                continue
            try:
                found_id = dict_IDS_of_interest[i.query_name]
            except KeyError:
                continue           
            if condition == "IVT_18S" and counter >= 20000:
                break            
            counter += 1
            start = i.reference_start
            end = i.reference_end
            for index in range(start,end):
                reads_aligning[index] += 1
            mod_obj = i.modified_bases
            if mod_obj != None:
                # try:
                #     mod_m6a = list(mod_obj[('A', 0, 'a')])
                # except KeyError:
                #     mod_m6a = None
                try:
                    mod_pseU = list(mod_obj[('T', 0, 17802)])
                except KeyError:
                    mod_pseU = None
                try:
                    aligned_pairs = i.get_aligned_pairs(with_seq=True)
                    alignment_dict = {}
                    for pair_element in aligned_pairs:
                        if None not in pair_element:
                            alignment_dict[str(pair_element[0])] = {"index_query":pair_element[0],
                                                                    "index_reference":pair_element[1],
                                                                    "base_query": str(i.get_forward_sequence())[pair_element[0]],
                                                                    "base_reference": reference_sequence[pair_element[1]]
                                                                    }
                    #if mod_m6a != None:
                    #    for mod_base in mod_m6a:
                    #        p = ((mod_base[1] + 1)/256)
                    #        if p >= 0.95 and str(mod_base[0]) in alignment_dict:
                    #            mod_positions_m6a[alignment_dict[str(mod_base[0])]["index_reference"]] += 1

                    if mod_pseU != None:
                        for mod_base2 in mod_pseU:
                            p = ((mod_base2[1] + 1)/256)
                            if p >= 0.95 and str(mod_base2[0]) in alignment_dict:
                                mod_positions_pseU[alignment_dict[str(mod_base2[0])]["index_reference"]] += 1
                                
                    for pair_element in aligned_pairs:
                        if None not in pair_element:
                            if alignment_dict[str(pair_element[0])]["base_query"] == "C" and alignment_dict[str(pair_element[0])]["base_reference"] == "T":
                                number_of_basecalled_C[pair_element[1]] += 1
                except TypeError:
                    print("A type error occured")
        
        reads_aligning = np.array(reads_aligning)
        reads_aligning[reads_aligning == 0] = 1


        positions = [i+1 for i in range(len(reads_aligning))]

        #mod_positions_m6a = np.array(mod_positions_m6a)
        #rel_mod_positions_m6a = mod_positions_m6a/reads_aligning

        mod_positions_pseU = np.array(mod_positions_pseU)
        rel_mod_positions_pseU = mod_positions_pseU/reads_aligning

        number_of_basecalled_C = np.array(number_of_basecalled_C)
        rel_number_of_basecalled_C = number_of_basecalled_C/reads_aligning

        dataset = {
            "position":positions,
            "reads_aligning":reads_aligning,
            "n_pseU":mod_positions_pseU,
            #"n_m6a":number_of_basecalled_C,
            "n_C":number_of_basecalled_C,
            "n_C_and_pseU": number_of_basecalled_C + mod_positions_pseU, 
            "rel_n_pseU":rel_mod_positions_pseU,
            #"rel_n_m6a":rel_mod_positions_m6a,
            "rel_n_C":rel_number_of_basecalled_C,
            "rel_n_C_and_PseU": rel_number_of_basecalled_C + rel_mod_positions_pseU
        }


        modification_df = pd.DataFrame(dataset)
        modification_df.to_csv(f"{condition}_modification_quantification.csv",sep=";",header=True,index=None)
    else:
        modification_df = pd.read_csv(modification_csv_path,sep=";",header=0)   
        
    max_ratio_list = []
    max_position_list = []
    
    for start,end in zip(psu_mod_df["start"],psu_mod_df["end"]):
        temp_modification_df = modification_df[modification_df["rel_n_C_and_PseU"] >= 0]
        mod_ratio = temp_modification_df[["position","rel_n_C_and_PseU"]][(temp_modification_df["position"]).isin([start-1,end,end+1])]
        if list(mod_ratio["rel_n_C_and_PseU"]) != []:
            max_ratio = max(mod_ratio["rel_n_C_and_PseU"])
            try:
                max_position = psU_mod_df_positions.index(end)
            except:
                break
            max_ratio_list.append(max_ratio)
            max_position_list.append(max_position)

    legend_name = condition.replace("_"," ")
    
    ax.scatter(max_position_list,max_ratio_list,c=color,marker=markerstyle,label=legend_name,s=80,alpha=0.6)
    ax.set_xticks(range(len(psU_mod_df_positions)))
    ax.set_xticklabels(psU_mod_df_positions)
    #return fig
    
    

In [None]:
fig,ax = plt.subplots(nrows=1,ncols=1,figsize=(20,10))
plt.xticks(rotation = 90)

view_modification_per_fragment(bamfile_path="~/directRNA_004/IVT_18S/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="red",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "IVT_18S",
                   fig = fig,
                   ax = ax
                   )


view_modification_per_fragment(bamfile_path="~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_pod5_rebasecalled_psU_m6A_aligned.bam",
                   fragment_df_path="~/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S-C",
                   color="palevioletred",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "NP_Nucleus_21S-C",
                   fig = fig,
                   ax = ax
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_pod5_rebasecalled_psU_m6A_aligned.bam",
                   fragment_df_path="~/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S",
                   color="indigo",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "NP_Nucleus_21S",
                   fig = fig,
                   ax = ax
                   )


view_modification_per_fragment(bamfile_path="~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_pod5_rebasecalled_psU_m6A_aligned.bam",
                   fragment_df_path="~/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S-E",
                   color="darkolivegreen",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "NP_Nucleus_18S-E",
                   fig = fig,
                   ax = ax
                   )



view_modification_per_fragment(bamfile_path="~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_pod5_rebasecalled_psU_m6A_aligned.bam",
                   fragment_df_path="~/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="darkred",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "NP_Nucleus_18S",
                   fig = fig,
                   ax = ax
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered_pod5_rebasecalled_psU_m6A_aligned.bam",
                   fragment_df_path="~/directRNA_004/20231114_RNA004_NP_Cyt/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="darkviolet",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "NP_Cytoplasm_18S",
                   fig = fig,
                   ax = ax
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_pod5_rebasecalled_psU_m6A_aligned.bam",
                   fragment_df_path="~/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S-C",
                   color="palevioletred",
                   markerstyle = "v",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "IVPA_Nucleus_21S-C",
                   fig = fig,
                   ax = ax
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_pod5_rebasecalled_psU_m6A_aligned.bam",
                   fragment_df_path="~/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S",
                   color="indigo",
                   markerstyle = "v",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "IVPA_Nucleus_21S",
                   fig = fig,
                   ax = ax
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_pod5_rebasecalled_psU_m6A_aligned.bam",
                   fragment_df_path="~/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S-E",
                   color="darkolivegreen",
                   markerstyle = "v",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "IVPA_Nucleus_18S-E",
                   fig = fig,
                   ax = ax
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_pod5_rebasecalled_psU_m6A_aligned.bam",
                   fragment_df_path="~/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="darkred",
                   markerstyle = "v",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "IVPA_Nucleus_18S",
                   fig = fig,
                   ax = ax
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_pod5_rebasecalled_psU_m6A_aligned.bam",
                   fragment_df_path="~/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="darkviolet",
                   markerstyle = "v",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "IVPA_Cytoplasm_18S",
                   fig = fig,
                   ax = ax
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/TSR_KO/filtered_pod5/filtered_pod5_rebasecalled.bam",
                   fragment_df_path="~/directRNA_004/TSR_KO/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="black",
                   markerstyle = "s",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "TSR_KO",
                   fig = fig,
                   ax = ax
                   )


view_modification_per_fragment(bamfile_path="~/directRNA_004/WBSCR_KO/filtered_pod5/filtered_pod5_rebasecalled.bam",
                   fragment_df_path="~/directRNA_004/WBSCR_KO/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="black",
                   markerstyle = "8",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "WBSCR_KO",
                   fig = fig,
                   ax = ax
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/DIMT1L_KO/filtered_pod5/filtered_pod5_rebasecalled.bam",
                   fragment_df_path="~/directRNA_004/DIMT1L_KO/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="black",
                   markerstyle = "X",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "DIMT1L_KO",
                   fig = fig,
                   ax = ax
                   )

ax.legend(loc='upper right')
ax.set_xlabel("PseU position on 45S")
ax.set_ylabel("Modification frequeny")
ax.set_ylim(0,1.1)

for tick in ax.get_xticks():
    ax.axvline(tick, color='black', linestyle='--', linewidth=1, alpha=.5)

for tick in ax.get_yticks():
    ax.axhline(tick, color='gray', linestyle='--', linewidth=1, alpha=.2)


In [None]:
fig.savefig("scatterplot_relative_modification_ratios_18S.svg", format='svg', dpi=1200)

In [None]:
view_modification_per_fragment(bamfile_path="~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_pod5_rebasecalled_psU_m6A_aligned.bam",
                   fragment_df_path="~/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="darkviolet",
                   linestyle = "--",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "IVPA_Cytoplasm_18S",
                   fig = fig,
                   ax = ax
                   )

In [None]:
view_modifications(bamfile_path="~/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered_pod5_rebasecalled_psU_m6A_aligned.bam",
                   IVT_path = "~/directRNA_004/IVT_18S/filtered_pod5/filtered_pod5_basecalled.bam",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "NP_Cytoplasm"
                   )

In [None]:
view_modifications(bamfile_path="~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_pod5_rebasecalled_psU_m6A_aligned.bam",
                   IVT_path = "~/directRNA_004/IVT_18S/filtered_pod5/filtered_pod5_basecalled.bam",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "NP_Nuc"
                   )

In [None]:
view_modifications(bamfile_path="~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_pod5_rebasecalled_psU_m6A_aligned.bam",
                   IVT_path = "~/directRNA_004/IVT_18S/filtered_pod5/filtered_pod5_basecalled.bam",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "IVPA_Nuc"
                   )

In [None]:
view_modifications(bamfile_path="~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_pod5_rebasecalled_psU_m6A_aligned.bam",
                   IVT_path = "~/directRNA_004/IVT_18S/filtered_pod5/filtered_pod5_basecalled.bam",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "IVPA_Cyt"
                   )

In [51]:
view_modifications(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   IVT_path = "~/directRNA_004/IVT_18S/filtered_pod5/filtered_pod5_basecalled.bam",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_R1"
                   )

In [53]:
view_modifications(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   IVT_path = "~/directRNA_004/IVT_18S/filtered_pod5/filtered_pod5_basecalled.bam",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_R2"
                   )

In [54]:
view_modifications(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   IVT_path = "~/directRNA_004/IVT_18S/filtered_pod5/filtered_pod5_basecalled.bam",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_R3"
                   )

In [55]:
view_modifications(bamfile_path="~/directRNA_004/20250319_SN2_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   IVT_path = "~/directRNA_004/IVT_18S/filtered_pod5/filtered_pod5_basecalled.bam",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_R1"
                   )

In [56]:
view_modifications(bamfile_path="~/directRNA_004/20250409_SN2_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   IVT_path = "~/directRNA_004/IVT_18S/filtered_pod5/filtered_pod5_basecalled.bam",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_R2"
                   )

In [57]:
view_modifications(bamfile_path="~/directRNA_004/20250409_SN2_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   IVT_path = "~/directRNA_004/IVT_18S/filtered_pod5/filtered_pod5_basecalled.bam",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_R3"
                   )

In [58]:
view_modifications(bamfile_path="~/directRNA_004/20250218_SN1_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   IVT_path = "~/directRNA_004/IVT_18S/filtered_pod5/filtered_pod5_basecalled.bam",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_R1"
                   )

In [59]:
view_modifications(bamfile_path="~/directRNA_004/20250409_SN1_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   IVT_path = "~/directRNA_004/IVT_18S/filtered_pod5/filtered_pod5_basecalled.bam",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_R2"
                   )

In [60]:
view_modifications(bamfile_path="~/directRNA_004/20250218_SN1_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   IVT_path = "~/directRNA_004/IVT_18S/filtered_pod5/filtered_pod5_basecalled.bam",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_R3"
                   )

In [3]:
view_modifications(bamfile_path="~/directRNA_004/20250416_Cyt1_dRNA/filtered_pod5/filtered_pod5_basecalled.bam",
                   IVT_path = "~/directRNA_004/IVT_18S/filtered_pod5/filtered_pod5_basecalled.bam",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "Cyt_R2"
                   )

In [4]:
view_modifications(bamfile_path="~/directRNA_004/20250416_Cyt2_dRNA/filtered_pod5/filtered_pod5_basecalled.bam",
                   IVT_path = "~/directRNA_004/IVT_18S/filtered_pod5/filtered_pod5_basecalled.bam",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "Cyt_R3"
                   )

In [5]:
view_modifications(bamfile_path="~/directRNA_004/20250416_Nuc1_dRNA/filtered_pod5/filtered_pod5_basecalled.bam",
                   IVT_path = "~/directRNA_004/IVT_18S/filtered_pod5/filtered_pod5_basecalled.bam",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "Nuc_R2"
                   )

In [6]:
view_modifications(bamfile_path="~/directRNA_004/20250416_Nuc2_dRNA/filtered_pod5/filtered_pod5_basecalled.bam",
                   IVT_path = "~/directRNA_004/IVT_18S/filtered_pod5/filtered_pod5_basecalled.bam",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "Nuc_R3"
                   )

In [10]:
fig,ax = plt.subplots(nrows=1,ncols=1,figsize=(20,10))
plt.xticks(rotation = 90)


### IVT ####

view_modification_per_fragment(bamfile_path="~/directRNA_004/IVT_18S/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="red",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "IVT_18S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/IVT_18S_modification_quantification.csv"
                 )
### 47S ####


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN1_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="47S",
                   color="palevioletred",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_1_47S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN1_1_47S_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN1_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="47S",
                   color="blue",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_3_47S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN1_3_47S_modification_quantification.csv"
                   )





view_modification_per_fragment(bamfile_path="~/directRNA_004/20250319_SN2_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250319_SN2_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="47S",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_1_47S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_1_47S_modification_quantification.csv"
                   )


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN2_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="47S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_2_47S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_2_47S_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN2_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="47S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_3_47S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_3_47S_modification_quantification.csv"
                   )








view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="47S",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_47S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_1_47S_modification_quantification.csv"
                   )


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN3_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="47S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_2_47S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_2_47S_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="47S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_47S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_3_47S_modification_quantification.csv"
                   )


### 45S ####

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN1_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="45S",
                   color="palevioletred",
                   markerstyle = "v",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_1_45S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN1_1_45S_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN1_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="45S",
                   color="blue",
                   markerstyle = "v",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_3_45S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN1_3_45S_modification_quantification.csv"
                   )






#### 43S ####

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="43S",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_43S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN3_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="43S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_2_43S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="43S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_43S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )




#### 41S ####


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="41S",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_41S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN3_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="41S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_2_41S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="41S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_41S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )



view_modification_per_fragment(bamfile_path="~/directRNA_004/20250319_SN2_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250319_SN2_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="45S",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_1_45S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_1_45S_modification_quantification.csv"
                   )


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN2_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="45S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_2_45S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_2_45S_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN2_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="45S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_3_45S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_3_45S_modification_quantification.csv"
                 )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="45S",
                   color="green",
                   markerstyle = "v",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_45S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_1_45S_modification_quantification.csv"
                   )


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN3_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="45S",
                   color="violet",
                   markerstyle = "v",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_2_45S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_2_45S_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="45S",
                   color="violet",
                   markerstyle = "v",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_45S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_3_45S_modification_quantification.csv"
                   )


### 30S+1S ####

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN1_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S+1",
                   color="palevioletred",
                   markerstyle = "s",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_1_30S+1",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN1_1_30S+1_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN1_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S+1",
                   color="blue",
                   markerstyle = "s",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_3_30S+1",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN1_3_30S+1_modification_quantification.csv"
                   )



view_modification_per_fragment(bamfile_path="~/directRNA_004/20250319_SN2_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250319_SN2_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S+1",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_1_30S+1",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_1_30S+1_modification_quantification.csv"
                   )


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN2_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S+1",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_2_30S+1",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_2_30S+1_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN2_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S+1",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_3_30S+1",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_3_30S+1_modification_quantification.csv"
                   )








view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S+1",
                   color="green",
                   markerstyle = "s",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_30S+1",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_1_30S+1_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN3_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S+1",
                   color="violet",
                   markerstyle = "s",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_2_30S+1",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_3_30S+1_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S+1",
                   color="violet",
                   markerstyle = "s",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_30S+1",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_3_30S+1_modification_quantification.csv"
                   )


### 30S ####

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN1_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S",
                   color="palevioletred",
                   markerstyle = "D",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_1_30S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN1_1_30S_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN1_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S",
                   color="blue",
                   markerstyle = "D",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_3_30S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN1_3_30S_modification_quantification.csv"
                   )




view_modification_per_fragment(bamfile_path="~/directRNA_004/20250319_SN2_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250319_SN2_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_1_30S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_1_30S_modification_quantification.csv"
                   )


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN2_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_2_30S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_2_30S_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN2_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_3_30S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_3_30S_modification_quantification.csv"
                   )








view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S",
                   color="green",
                   markerstyle = "D",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_30S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_1_30S_modification_quantification.csv"
                   )


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN3_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S",
                   color="violet",
                   markerstyle = "D",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_2_30S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_2_30S_modification_quantification.csv"
                   )


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S",
                   color="violet",
                   markerstyle = "D",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_30S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_3_30S_modification_quantification.csv"
                   )


### 26S ####

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN1_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="26S",
                   color="palevioletred",
                   markerstyle = "2",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_1_26S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN1_1_26S_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN1_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="26S",
                   color="blue",
                   markerstyle = "2",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_3_26S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN1_3_26S_modification_quantification.csv"
                   )






view_modification_per_fragment(bamfile_path="~/directRNA_004/20250319_SN2_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250319_SN2_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="26S",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_1_26S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_1_26S_modification_quantification.csv"
                   )


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN2_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="26S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_2_26S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_2_26S_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN2_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="26S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_3_26S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_3_26S_modification_quantification.csv"
                   )








view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="26S",
                   color="green",
                   markerstyle = "2",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_26S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_1_26S_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN3_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="26S",
                   color="violet",
                   markerstyle = "2",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_2_26S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_2_26S_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="26S",
                   color="violet",
                   markerstyle = "2",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_26S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_3_26S_modification_quantification.csv"
                   )


### 21S ####

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN1_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S",
                   color="palevioletred",
                   markerstyle = "X",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_1_21S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN1_1_21S_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN1_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S",
                   color="blue",
                   markerstyle = "X",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_3_21S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN1_3_21S_modification_quantification.csv"
                   )





view_modification_per_fragment(bamfile_path="~/directRNA_004/20250319_SN2_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250319_SN2_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_1_21S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_1_21S_modification_quantification.csv"
                   )


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN2_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_2_21S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_2_21S_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN2_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_3_21S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_3_21S_modification_quantification.csv"
                   )











view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S",
                   color="green",
                   markerstyle = "X",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_21S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_1_21S_modification_quantification.csv"
                   )



view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN3_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S",
                   color="violet",
                   markerstyle = "X",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_2_21S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_2_21S_modification_quantification.csv"
                  )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S",
                   color="violet",
                   markerstyle = "X",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_21S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_3_21S_modification_quantification.csv"
                  )


### 21S-C ####

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN1_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S-C",
                   color="palevioletred",
                   markerstyle = "P",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_1_21S-C",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN1_1_21S-C_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN1_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S-C",
                   color="blue",
                   markerstyle = "P",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_3_32S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN1_3_21S-C_modification_quantification.csv"
                   )




view_modification_per_fragment(bamfile_path="~/directRNA_004/20250319_SN2_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250319_SN2_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S-C",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_1_21S-C",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_1_21S-C_modification_quantification.csv"
                   )


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN2_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S-C",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_2_21S-C",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_2_21S-C_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN2_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S-C",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_3_21S-C",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_3_21S-C_modification_quantification.csv"
                   )





view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S-C",
                   color="green",
                   markerstyle = "P",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_21S-C",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_1_21S-C_modification_quantification.csv"
                   )


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN3_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S-C",
                   color="violet",
                   markerstyle = "P",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_2_21S-C",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_2_21S-C_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S-C",
                   color="violet",
                   markerstyle = "P",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_21S-C",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_3_21S-C_modification_quantification.csv"
                   )


### 18S-E ####


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN1_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S-E",
                   color="palevioletred",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_1_18S-E",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN1_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN1_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S-E",
                   color="blue",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_2_18S-E",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN1_2_18S_modification_quantification.csv"
                   )


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN1_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S-E",
                   color="blue",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_3_18S-E",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )







### 18S ####

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN1_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="palevioletred",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_1_18S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN1_1_18S_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN1_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN1_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="blue",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_2_18S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN1_2_18S_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN1_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="blue",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_3_18S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN1_3_18S_modification_quantification.csv"
                   )



view_modification_per_fragment(bamfile_path="~/directRNA_004/20250319_SN2_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250319_SN2_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_1_18S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_1_18S_modification_quantification.csv"
                   )


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN2_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_2_18S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_2_18S_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN2_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_3_18S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN2_3_18S_modification_quantification.csv"
                   )






view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="green",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_18S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_1_18S_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN3_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="violet",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_2_18S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_2_18S_modification_quantification.csv"
                  )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="violet",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_18S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="~/wf-nanoribolyzer/jupyter_notebooks/SN3_3_18S_modification_quantification.csv"
                   )


ax.legend(loc='upper right')
ax.set_xlabel("PseU position on 45S")
ax.set_ylabel("Modification frequeny")
ax.set_ylim(0,1.1)

for tick in ax.get_xticks():
    ax.axvline(tick, color='black', linestyle='--', linewidth=1, alpha=.5)

for tick in ax.get_yticks():
    ax.axhline(tick, color='gray', linestyle='--', linewidth=1, alpha=.2)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

In [83]:
import seaborn as sns
SN3_1_47S_mod_df = pd.read_csv("./SN3_1_47S_modification_quantification.csv", sep = ";",header = 0)
SN3_1_47S_mod_df["Condition"] = "47S SN3"
SN3_2_47S_mod_df = pd.read_csv("./SN3_2_47S_modification_quantification.csv", sep = ";",header = 0)
SN3_2_47S_mod_df["Condition"] = "47S SN3"
SN3_3_47S_mod_df = pd.read_csv("./SN3_3_47S_modification_quantification.csv", sep = ";",header = 0)
SN3_3_47S_mod_df["Condition"] = "47S SN3"

SN3_1_45S_mod_df = pd.read_csv("./SN3_1_45S_modification_quantification.csv", sep = ";",header = 0)
SN3_1_45S_mod_df["Condition"] = "45S SN3"
SN3_2_45S_mod_df = pd.read_csv("./SN3_2_45S_modification_quantification.csv", sep = ";",header = 0)
SN3_2_45S_mod_df["Condition"] = "45S SN3"
SN3_3_45S_mod_df = pd.read_csv("./SN3_3_45S_modification_quantification.csv", sep = ";",header = 0)
SN3_3_45S_mod_df["Condition"] = "45S SN3"

SN3_1_43S_mod_df = pd.read_csv("./SN3_1_43S_modification_quantification.csv", sep = ";",header = 0)
SN3_1_43S_mod_df["Condition"] = "43S SN3"
SN3_2_43S_mod_df = pd.read_csv("./SN3_2_43S_modification_quantification.csv", sep = ";",header = 0)
SN3_2_43S_mod_df["Condition"] = "43S SN3"
SN3_3_43S_mod_df = pd.read_csv("./SN3_3_43S_modification_quantification.csv", sep = ";",header = 0)
SN3_3_43S_mod_df["Condition"] = "43S SN3"

SN3_1_41S_mod_df = pd.read_csv("./SN3_1_41S_modification_quantification.csv", sep = ";",header = 0)
SN3_1_41S_mod_df["Condition"] = "41S SN3"
SN3_2_41S_mod_df = pd.read_csv("./SN3_2_41S_modification_quantification.csv", sep = ";",header = 0)
SN3_2_41S_mod_df["Condition"] = "41S SN3"
SN3_3_41S_mod_df = pd.read_csv("./SN3_3_41S_modification_quantification.csv", sep = ";",header = 0)
SN3_3_41S_mod_df["Condition"] = "41S SN3"

SN3_1_30S_1_mod_df = pd.read_csv("./SN3_1_30S+1_modification_quantification.csv", sep = ";",header = 0)
SN3_1_30S_1_mod_df["Condition"] = "30S+1 SN3"
SN3_2_30S_1_mod_df = pd.read_csv("./SN3_2_30S+1_modification_quantification.csv", sep = ";",header = 0)
SN3_2_30S_1_mod_df["Condition"] = "30S+1 SN3"
SN3_3_30S_1_mod_df = pd.read_csv("./SN3_3_30S+1_modification_quantification.csv", sep = ";",header = 0)
SN3_3_30S_1_mod_df["Condition"] = "30S+1 SN3"

SN3_1_30S_mod_df = pd.read_csv("./SN3_1_30S_modification_quantification.csv", sep = ";",header = 0)
SN3_1_30S_mod_df["Condition"] = "30S SN3"
SN3_2_30S_mod_df = pd.read_csv("./SN3_2_30S_modification_quantification.csv", sep = ";",header = 0)
SN3_2_30S_mod_df["Condition"] = "30S SN3"
SN3_3_30S_mod_df = pd.read_csv("./SN3_3_30S_modification_quantification.csv", sep = ";",header = 0)
SN3_3_30S_mod_df["Condition"] = "30S SN3"

SN3_1_26S_mod_df = pd.read_csv("./SN3_1_26S_modification_quantification.csv", sep = ";",header = 0)
SN3_1_26S_mod_df["Condition"] = "26S SN3"
SN3_2_26S_mod_df = pd.read_csv("./SN3_2_26S_modification_quantification.csv", sep = ";",header = 0)
SN3_2_26S_mod_df["Condition"] = "26S SN3"
SN3_3_26S_mod_df = pd.read_csv("./SN3_3_26S_modification_quantification.csv", sep = ";",header = 0)
SN3_3_26S_mod_df["Condition"] = "26S SN3"

SN3_1_21S_mod_df = pd.read_csv("./SN3_1_21S_modification_quantification.csv", sep = ";",header = 0)
SN3_1_21S_mod_df["Condition"] = "21S SN3"
SN3_2_21S_mod_df = pd.read_csv("./SN3_2_21S_modification_quantification.csv", sep = ";",header = 0)
SN3_2_21S_mod_df["Condition"] = "21S SN3"
SN3_3_21S_mod_df = pd.read_csv("./SN3_3_21S_modification_quantification.csv", sep = ";",header = 0)
SN3_3_21S_mod_df["Condition"] = "21S SN3"

SN3_1_21S_C_mod_df = pd.read_csv("./SN3_1_21S-C_modification_quantification.csv", sep = ";",header = 0)
SN3_1_21S_C_mod_df["Condition"] = "21S-C SN3"
SN3_2_21S_C_mod_df = pd.read_csv("./SN3_2_21S-C_modification_quantification.csv", sep = ";",header = 0)
SN3_2_21S_C_mod_df["Condition"] = "21S-C SN3"
SN3_3_21S_C_mod_df = pd.read_csv("./SN3_3_21S-C_modification_quantification.csv", sep = ";",header = 0)
SN3_3_21S_C_mod_df["Condition"] = "21S-C SN3"



SN3_1_18S_E_mod_df = pd.read_csv("./SN3_1_18S-E_modification_quantification.csv", sep = ";",header = 0)
SN3_1_18S_E_mod_df["Condition"] = "18S-E SN3"
SN3_2_18S_E_mod_df = pd.read_csv("./SN3_2_18S-E_modification_quantification.csv", sep = ";",header = 0)
SN3_2_18S_E_mod_df["Condition"] = "18S-E SN3"
SN3_3_18S_E_mod_df = pd.read_csv("./SN3_3_18S-E_modification_quantification.csv", sep = ";",header = 0)
SN3_3_18S_E_mod_df["Condition"] = "18S-E SN3"

SN3_1_18S_mod_df = pd.read_csv("./SN3_1_18S_modification_quantification.csv", sep = ";",header = 0)
SN3_1_18S_mod_df["Condition"] = "18S SN3"
SN3_2_18S_mod_df = pd.read_csv("./SN3_2_18S_modification_quantification.csv", sep = ";",header = 0)
SN3_2_18S_mod_df["Condition"] = "18S SN3"
SN3_3_18S_mod_df = pd.read_csv("./SN3_3_18S_modification_quantification.csv", sep = ";",header = 0)
SN3_3_18S_mod_df["Condition"] = "18S SN3"

SN1_1_18S_E_mod_df = pd.read_csv("./SN1_1_18S-E_modification_quantification.csv", sep = ";",header = 0)
SN1_1_18S_E_mod_df["Condition"] = "18S-E SN1"
SN1_2_18S_E_mod_df = pd.read_csv("./SN1_2_18S-E_modification_quantification.csv", sep = ";",header = 0)
SN1_2_18S_E_mod_df["Condition"] = "18S-E SN1"
SN1_3_18S_E_mod_df = pd.read_csv("./SN1_3_18S-E_modification_quantification.csv", sep = ";",header = 0)
SN1_3_18S_E_mod_df["Condition"] = "18S-E SN1"

SN1_1_18S_mod_df = pd.read_csv("./SN1_1_18S_modification_quantification.csv", sep = ";",header = 0)
SN1_1_18S_mod_df["Condition"] = "18S SN1"
SN1_2_18S_mod_df = pd.read_csv("./SN1_2_18S_modification_quantification.csv", sep = ";",header = 0)
SN1_2_18S_mod_df["Condition"] = "18S SN1"
SN1_3_18S_mod_df = pd.read_csv("./SN1_3_18S_modification_quantification.csv", sep = ";",header = 0)
SN1_3_18S_mod_df["Condition"] = "18S SN1"

IVT_mod_df = pd.read_csv("./IVT_18S_modification_quantification.csv", sep = ";",header = 0)
IVT_mod_df["Condition"] = "IVT"

array_of_modification_dfs = [
                            SN3_1_47S_mod_df,
                            SN3_3_47S_mod_df,
                            SN3_1_45S_mod_df,
                            SN3_2_45S_mod_df,
                            SN3_3_45S_mod_df,
                            SN3_1_30S_1_mod_df,
                            SN3_2_30S_1_mod_df,
                            SN3_3_30S_1_mod_df,
                            SN3_1_30S_mod_df,
                            SN3_2_30S_mod_df,
                            SN3_3_30S_mod_df,
                            SN3_1_26S_mod_df,
                            SN3_2_26S_mod_df,
                            SN3_3_26S_mod_df,
                            SN3_1_21S_mod_df,
                            SN3_2_21S_mod_df,
                            SN3_3_21S_mod_df,
                            SN3_1_21S_C_mod_df,
                            SN3_2_21S_C_mod_df,
                            SN3_3_21S_C_mod_df,
                            SN1_1_18S_E_mod_df,
                            SN1_2_18S_E_mod_df,
                            SN1_3_18S_E_mod_df,
                            SN3_1_18S_E_mod_df,
                            SN3_2_18S_E_mod_df,
                            SN3_3_18S_E_mod_df,
                            SN1_1_18S_mod_df,
                            SN1_2_18S_mod_df,
                            SN1_3_18S_mod_df,
                            SN3_1_18S_mod_df,
                            SN3_2_18S_mod_df,
                            SN3_3_18S_mod_df,
                            IVT_mod_df
                            ]



def boxplot_modification_frequencies_per_fragment(array_of_modification_dfs,literature_mod_path, metric:str="rel_n_C_and_PseU"):
    literature_mod_df = pd.read_csv(literature_mod_path ,sep="\t",header=None,index_col=None) #"~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed"
    literature_mod_df.columns = ["reference","start","end","modification","A","B","C"]
    psu_mod_df = literature_mod_df.loc[literature_mod_df["modification"] == "psu"]
    
    concat_modifications_df = pd.DataFrame()
    for temp_modification_df in array_of_modification_dfs:
        concat_modifications_df = pd.concat([concat_modifications_df,temp_modification_df],axis=0)
        
    final_modification_df = pd.DataFrame()
    fig, axs = plt.subplots(ncols=1,nrows=2,figsize=(20,15))
    for start,end in zip(psu_mod_df["start"],psu_mod_df["end"]):
        temp_modification_df = concat_modifications_df[(concat_modifications_df["position"]).isin([end])]
        temp_modification_df = temp_modification_df[temp_modification_df["position"] > 3670]
        temp_modification_df = temp_modification_df[temp_modification_df["position"] < 4518]
        final_modification_df = pd.concat([final_modification_df,temp_modification_df],axis=0)
    print(final_modification_df.columns)
    sns.boxplot(final_modification_df,x="position",y=metric,hue="Condition", ax = axs[0])
    axs[0].legend(loc="upper right",bbox_to_anchor=[1.1,1])
    
    
    final_modification_df = pd.DataFrame()
    for start,end in zip(psu_mod_df["start"],psu_mod_df["end"]):
        temp_modification_df = concat_modifications_df[(concat_modifications_df["position"]).isin([end])]
        temp_modification_df = temp_modification_df[temp_modification_df["position"] > 4518]
        temp_modification_df = temp_modification_df[temp_modification_df["position"] < 5347]
        final_modification_df = pd.concat([final_modification_df,temp_modification_df],axis=0)
    
    sns.boxplot(final_modification_df,x="position",y=metric,hue="Condition", ax = axs[1])
    axs[1].legend(loc="upper right",bbox_to_anchor=[1.1,1])

    fig.tight_layout()
    step = 1
    step2 = 0.1
    xmin, xmax = axs[0].get_xlim()  # Get x-axis limits
    x_positions = np.arange(xmin, xmax, step)  # Generate positions for vertical lines
    x_positions2 = np.arange(xmin, xmax, step2)  # Generate positions for vertical lines

    # Draw vertical lines without modifying x-ticks
    for x in x_positions:
        axs[0].axvline(x=x, color='gray', linestyle='--', alpha=1) 
    for x in x_positions2:
        axs[0].axvline(x=x, color='gray', linestyle='--', alpha=0.1) 
    
    step = 1
    step2 = 0.1
    xmin, xmax = axs[1].get_xlim()  # Get x-axis limits
    x_positions = np.arange(xmin, xmax, step)  # Generate positions for vertical lines
    x_positions2 = np.arange(xmin, xmax, step2)  # Generate positions for vertical lines

    # Draw vertical lines without modifying x-ticks
    for x in x_positions:
        axs[1].axvline(x=x, color='gray', linestyle='--', alpha=1) 
    for x in x_positions2:
        axs[1].axvline(x=x, color='gray', linestyle='--', alpha=0.1) 
    return fig, final_modification_df

   
    
    
fig, final_mod_df = boxplot_modification_frequencies_per_fragment(array_of_modification_dfs,"~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed")
# plt.savefig("./18S_E_SN3_dRNA_modification_per_fragment_boxplots.svg")

fig2, final_mod_n_pseU_df = boxplot_modification_frequencies_per_fragment(array_of_modification_dfs,"~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed","rel_n_pseU")
# plt.savefig("./18S_E_SN3_dRNA_modification_PseU_only_per_fragment_boxplots.svg")

In [None]:
def plot_correlation_matrix(df:pd.DataFrame, modification_reference_df_path:str, condition_name:str):
    modification_reference_df = pd.read_csv(modification_reference_df_path,sep="\t").dropna(axis=1)
    modification_reference_df.columns = ["Template","Start","End","Modification"]
    modification_reference_df = modification_reference_df[modification_reference_df["Modification"] == "psu"]
    #print(modification_reference_df)
    df = pd.merge(df,modification_reference_df,how="inner",left_on = "position", right_on = "End")
    df["Modification_and_Position"] = [f"{i} {j}" for i,j in zip(df["Modification"],df["position"])]
    #print(df)
    df["rel_n_pseU"] = [float(i) for i in df["rel_n_pseU"]]
    df_pivot = df.pivot(index='Condition', columns='End', values='rel_n_pseU').fillna(0)
    df_corr = df_pivot.corr()
    fig,ax = plt.subplots(nrows=1,ncols=1,figsize=(30,20))
    x = sns.heatmap(df_corr,vmin=-1,vmax=1,ax=ax,cmap="viridis")
    fig.show()
    plt.savefig(f"~/Figure/correlation_matrix_{condition_name}.svg")
    plt.savefig(f"~/Figure/correlation_matrix_{condition_name}.png")
    



In [141]:
# SN3_1_47S_mod_df["Condition"] = [f"{i} 1" for i in SN3_1_47S_mod_df["Condition"]]
# SN3_2_47S_mod_df["Condition"] = [f"{i} 2" for i in SN3_2_47S_mod_df["Condition"]]
# SN3_3_47S_mod_df["Condition"] = [f"{i} 3" for i in SN3_3_47S_mod_df["Condition"]]

SN3_1_47S_mod_df["Condition"] = [f"{i} 1" for i in SN3_1_47S_mod_df["Condition"]]
SN3_2_47S_mod_df["Condition"] = [f"{i} 2" for i in SN3_2_47S_mod_df["Condition"]]
SN3_3_47S_mod_df["Condition"] = [f"{i} 3" for i in SN3_3_47S_mod_df["Condition"]]
plot_correlation_matrix(pd.concat([SN3_1_47S_mod_df,SN3_2_47S_mod_df,SN3_3_47S_mod_df],axis=0),
                        "~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                        "SN3_47S")

In [142]:
# SN3_1_45S_mod_df["Condition"] = [f"{i} 1" for i in SN3_1_45S_mod_df["Condition"]]
# SN3_2_45S_mod_df["Condition"] = [f"{i} 2" for i in SN3_2_45S_mod_df["Condition"]]
# SN3_3_45S_mod_df["Condition"] = [f"{i} 3" for i in SN3_3_45S_mod_df["Condition"]]
plot_correlation_matrix(pd.concat([SN3_1_45S_mod_df,SN3_2_45S_mod_df,SN3_3_45S_mod_df],axis=0)
                        ,"~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                        "SN3_45S")



In [143]:
# SN3_1_30S_1_mod_df["Condition"] = [f"{i} 1" for i in SN3_1_30S_1_mod_df["Condition"]]
# SN3_2_30S_1_mod_df["Condition"] = [f"{i} 2" for i in SN3_2_30S_1_mod_df["Condition"]]
# SN3_3_30S_1_mod_df["Condition"] = [f"{i} 3" for i in SN3_3_30S_1_mod_df["Condition"]]
plot_correlation_matrix(pd.concat([SN3_1_30S_1_mod_df,SN3_2_30S_1_mod_df,SN3_3_30S_1_mod_df],axis=0)
                        ,"~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                        "SN3_30S_1")

In [144]:
SN3_1_26S_mod_df["Condition"] = [f"{i} 1" for i in SN3_1_26S_mod_df["Condition"]]
SN3_2_26S_mod_df["Condition"] = [f"{i} 2" for i in SN3_2_26S_mod_df["Condition"]]
SN3_3_26S_mod_df["Condition"] = [f"{i} 3" for i in SN3_3_26S_mod_df["Condition"]]
plot_correlation_matrix(pd.concat([SN3_1_26S_mod_df,SN3_2_26S_mod_df,SN3_3_26S_mod_df],axis=0)
                        ,"~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                        "SN3_26S")

In [145]:
SN3_1_30S_mod_df["Condition"] = [f"{i} 1" for i in SN3_1_30S_mod_df["Condition"]]
SN3_2_30S_mod_df["Condition"] = [f"{i} 2" for i in SN3_2_30S_mod_df["Condition"]]
SN3_3_30S_mod_df["Condition"] = [f"{i} 3" for i in SN3_3_30S_mod_df["Condition"]]
plot_correlation_matrix(pd.concat([SN3_1_30S_mod_df,SN3_2_30S_mod_df,SN3_3_30S_mod_df],axis=0),
                        "~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                        "SN3_30S")

In [146]:
SN3_1_21S_C_mod_df["Condition"] = [f"{i} 1" for i in SN3_1_21S_C_mod_df["Condition"]]
SN3_2_21S_C_mod_df["Condition"] = [f"{i} 2" for i in SN3_2_21S_C_mod_df["Condition"]]
SN3_3_21S_C_mod_df["Condition"] = [f"{i} 3" for i in SN3_3_21S_C_mod_df["Condition"]]
plot_correlation_matrix(pd.concat([SN3_1_21S_C_mod_df,SN3_2_21S_C_mod_df,SN3_3_21S_C_mod_df],axis=0)
                        ,"~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                        "SN3_21S_C")

In [147]:
# SN3_1_21S_mod_df["Condition"] = [f"{i} 1" for i in SN3_1_21S_mod_df["Condition"]]
# SN3_2_21S_mod_df["Condition"] = [f"{i} 2" for i in SN3_2_21S_mod_df["Condition"]]
# SN3_3_21S_mod_df["Condition"] = [f"{i} 3" for i in SN3_3_21S_mod_df["Condition"]]
plot_correlation_matrix(pd.concat([SN3_1_21S_mod_df,SN3_2_21S_mod_df,SN3_3_21S_mod_df],axis=0)
                        ,"~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                        "SN3_21S")

In [148]:
SN3_1_18S_E_mod_df["Condition"] = [f"{i} 1" for i in SN3_1_18S_E_mod_df["Condition"]]
SN3_2_18S_E_mod_df["Condition"] = [f"{i} 2" for i in SN3_2_18S_E_mod_df["Condition"]]
SN3_3_18S_E_mod_df["Condition"] = [f"{i} 3" for i in SN3_3_18S_E_mod_df["Condition"]]
plot_correlation_matrix(pd.concat([SN3_1_18S_E_mod_df,SN3_2_18S_E_mod_df,SN3_3_18S_E_mod_df],axis=0)
                        ,"~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                        "SN3_18S_E")

In [149]:
SN1_1_18S_E_mod_df["Condition"] = [f"{i} 1" for i in SN1_1_18S_E_mod_df["Condition"]]
SN1_2_18S_E_mod_df["Condition"] = [f"{i} 2" for i in SN1_2_18S_E_mod_df["Condition"]]
SN1_3_18S_E_mod_df["Condition"] = [f"{i} 3" for i in SN1_3_18S_E_mod_df["Condition"]]
plot_correlation_matrix(pd.concat([SN1_1_18S_E_mod_df,SN1_2_18S_E_mod_df,SN1_3_18S_E_mod_df],axis=0)
                        ,"~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                        "SN1_18S_E")

In [150]:
SN1_1_18S_mod_df["Condition"] = [f"{i} 1" for i in SN1_1_18S_mod_df["Condition"]]
SN1_2_18S_mod_df["Condition"] = [f"{i} 2" for i in SN1_2_18S_mod_df["Condition"]]
SN1_3_18S_mod_df["Condition"] = [f"{i} 3" for i in SN1_3_18S_mod_df["Condition"]]
plot_correlation_matrix(pd.concat([SN1_1_18S_mod_df,SN1_2_18S_mod_df,SN1_3_18S_mod_df],axis=0)
                        ,"~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                        "SN1_18S")

In [151]:
SN3_1_18S_mod_df["Condition"] = [f"{i} 1" for i in SN3_1_18S_mod_df["Condition"]]
SN3_2_18S_mod_df["Condition"] = [f"{i} 2" for i in SN3_2_18S_mod_df["Condition"]]
SN3_3_18S_mod_df["Condition"] = [f"{i} 3" for i in SN3_3_18S_mod_df["Condition"]]
plot_correlation_matrix(pd.concat([SN3_1_18S_mod_df,SN3_2_18S_mod_df,SN3_3_18S_mod_df],axis=0)
                        ,"~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                        "SN3_18S")

In [10]:
import seaborn as sns
sns.color_palette("pastel")

SN2_1_47S_mod_df = pd.read_csv("./SN2_1_47S_modification_quantification.csv", sep = ";",header = 0)
SN2_1_47S_mod_df["Condition"] = "47S SN2"
SN2_2_47S_mod_df = pd.read_csv("./SN2_2_47S_modification_quantification.csv", sep = ";",header = 0)
SN2_2_47S_mod_df["Condition"] = "47S SN2"
SN2_3_47S_mod_df = pd.read_csv("./SN2_3_47S_modification_quantification.csv", sep = ";",header = 0)
SN2_3_47S_mod_df["Condition"] = "47S SN2"

SN2_1_45S_mod_df = pd.read_csv("./SN2_1_45S_modification_quantification.csv", sep = ";",header = 0)
SN2_1_45S_mod_df["Condition"] = "45S SN2"
SN2_2_45S_mod_df = pd.read_csv("./SN2_2_45S_modification_quantification.csv", sep = ";",header = 0)
SN2_2_45S_mod_df["Condition"] = "45S SN2"
SN2_3_45S_mod_df = pd.read_csv("./SN2_3_45S_modification_quantification.csv", sep = ";",header = 0)
SN2_3_45S_mod_df["Condition"] = "45S SN2"

SN2_1_30S_1_mod_df = pd.read_csv("./SN2_1_30S+1_modification_quantification.csv", sep = ";",header = 0)
SN2_1_30S_1_mod_df["Condition"] = "30S+1 SN2"
SN2_2_30S_1_mod_df = pd.read_csv("./SN2_2_30S+1_modification_quantification.csv", sep = ";",header = 0)
SN2_2_30S_1_mod_df["Condition"] = "30S+1 SN2"
SN2_3_30S_1_mod_df = pd.read_csv("./SN2_3_30S+1_modification_quantification.csv", sep = ";",header = 0)
SN2_3_30S_1_mod_df["Condition"] = "30S+1 SN2"

SN2_1_30S_mod_df = pd.read_csv("./SN2_1_30S_modification_quantification.csv", sep = ";",header = 0)
SN2_1_30S_mod_df["Condition"] = "30S SN2"
SN2_2_30S_mod_df = pd.read_csv("./SN2_2_30S_modification_quantification.csv", sep = ";",header = 0)
SN2_2_30S_mod_df["Condition"] = "30S SN2"
SN2_3_30S_mod_df = pd.read_csv("./SN2_3_30S_modification_quantification.csv", sep = ";",header = 0)
SN2_3_30S_mod_df["Condition"] = "30S SN2"

SN2_1_26S_mod_df = pd.read_csv("./SN2_1_26S_modification_quantification.csv", sep = ";",header = 0)
SN2_1_26S_mod_df["Condition"] = "26S SN2"
SN2_2_26S_mod_df = pd.read_csv("./SN2_2_26S_modification_quantification.csv", sep = ";",header = 0)
SN2_2_26S_mod_df["Condition"] = "26S SN2"
SN2_3_26S_mod_df = pd.read_csv("./SN2_3_26S_modification_quantification.csv", sep = ";",header = 0)
SN2_3_26S_mod_df["Condition"] = "26S SN2"

SN2_1_21S_mod_df = pd.read_csv("./SN2_1_21S_modification_quantification.csv", sep = ";",header = 0)
SN2_1_21S_mod_df["Condition"] = "21S SN2"
SN2_2_21S_mod_df = pd.read_csv("./SN2_2_21S_modification_quantification.csv", sep = ";",header = 0)
SN2_2_21S_mod_df["Condition"] = "21S SN2"
SN2_3_21S_mod_df = pd.read_csv("./SN2_3_21S_modification_quantification.csv", sep = ";",header = 0)
SN2_3_21S_mod_df["Condition"] = "21S SN2"

SN2_1_21S_C_mod_df = pd.read_csv("./SN2_1_21S-C_modification_quantification.csv", sep = ";",header = 0)
SN2_1_21S_C_mod_df["Condition"] = "21S-C SN2"
SN2_2_21S_C_mod_df = pd.read_csv("./SN2_2_21S-C_modification_quantification.csv", sep = ";",header = 0)
SN2_2_21S_C_mod_df["Condition"] = "21S-C SN2"
SN2_3_21S_C_mod_df = pd.read_csv("./SN2_3_21S-C_modification_quantification.csv", sep = ";",header = 0)
SN2_3_21S_C_mod_df["Condition"] = "21S-C SN2"

SN2_1_18S_E_mod_df = pd.read_csv("./SN2_1_18S-E_modification_quantification.csv", sep = ";",header = 0)
SN2_1_18S_E_mod_df["Condition"] = "18S-E SN2"
SN2_2_18S_E_mod_df = pd.read_csv("./SN2_2_18S-E_modification_quantification.csv", sep = ";",header = 0)
SN2_2_18S_E_mod_df["Condition"] = "18S-E SN2"
SN2_3_18S_E_mod_df = pd.read_csv("./SN2_3_18S-E_modification_quantification.csv", sep = ";",header = 0)
SN2_3_18S_E_mod_df["Condition"] = "18S-E SN2"

SN2_1_18S_mod_df = pd.read_csv("./SN2_1_18S_modification_quantification.csv", sep = ";",header = 0)
SN2_1_18S_mod_df["Condition"] = "18S SN2"
SN2_2_18S_mod_df = pd.read_csv("./SN2_2_18S_modification_quantification.csv", sep = ";",header = 0)
SN2_2_18S_mod_df["Condition"] = "18S SN2"
SN2_3_18S_mod_df = pd.read_csv("./SN2_3_18S_modification_quantification.csv", sep = ";",header = 0)
SN2_3_18S_mod_df["Condition"] = "18S SN2"

SN1_1_18S_E_mod_df = pd.read_csv("./SN1_1_18S-E_modification_quantification.csv", sep = ";",header = 0)
SN1_1_18S_E_mod_df["Condition"] = "18S-E SN1"
SN1_2_18S_E_mod_df = pd.read_csv("./SN1_2_18S-E_modification_quantification.csv", sep = ";",header = 0)
SN1_2_18S_E_mod_df["Condition"] = "18S-E SN1"
SN1_3_18S_E_mod_df = pd.read_csv("./SN1_3_18S-E_modification_quantification.csv", sep = ";",header = 0)
SN1_3_18S_E_mod_df["Condition"] = "18S-E SN1"

SN1_1_18S_mod_df = pd.read_csv("./SN1_1_18S_modification_quantification.csv", sep = ";",header = 0)
SN1_1_18S_mod_df["Condition"] = "18S SN1"
SN1_2_18S_mod_df = pd.read_csv("./SN1_2_18S_modification_quantification.csv", sep = ";",header = 0)
SN1_2_18S_mod_df["Condition"] = "18S SN1"
SN1_3_18S_mod_df = pd.read_csv("./SN1_3_18S_modification_quantification.csv", sep = ";",header = 0)
SN1_3_18S_mod_df["Condition"] = "18S SN1"

IVT_mod_df = pd.read_csv("./IVT_18S_modification_quantification.csv", sep = ";",header = 0)
IVT_mod_df["Condition"] = "IVT"

array_of_modification_dfs = [
                            SN2_1_47S_mod_df,
                            SN2_2_47S_mod_df,
                            SN2_3_47S_mod_df,
                            SN2_1_45S_mod_df,
                            SN2_2_45S_mod_df,
                            SN2_3_45S_mod_df,
                            SN2_1_30S_1_mod_df,
                            SN2_2_30S_1_mod_df,
                            SN2_3_30S_1_mod_df,
                            SN2_1_30S_mod_df,
                            SN2_2_30S_mod_df,
                            SN2_3_30S_mod_df,
                            SN2_1_26S_mod_df,
                            SN2_2_26S_mod_df,
                            SN2_3_26S_mod_df,
                            SN2_1_21S_mod_df,
                            SN2_2_21S_mod_df,
                            SN2_3_21S_mod_df,
                            SN2_1_21S_C_mod_df,
                            SN2_2_21S_C_mod_df,
                            SN2_3_21S_C_mod_df,
                            SN2_1_18S_E_mod_df,
                            SN2_2_18S_E_mod_df,
                            SN2_3_18S_E_mod_df,
                            SN2_1_18S_mod_df,
                            SN2_2_18S_mod_df,
                            SN2_3_18S_mod_df,
                            SN1_1_18S_E_mod_df,
                            SN1_2_18S_E_mod_df,
                            SN1_3_18S_E_mod_df,
                            SN1_1_18S_mod_df,
                            SN1_2_18S_mod_df,
                            SN1_3_18S_mod_df,
                            IVT_mod_df
                            ]



def boxplot_modification_frequencies_per_fragment(array_of_modification_dfs,literature_mod_path, metric:str="rel_n_C_and_PseU"):
    literature_mod_df = pd.read_csv(literature_mod_path ,sep="\t",header=None,index_col=None) #"~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed"
    literature_mod_df.columns = ["reference","start","end","modification","A","B","C"]
    psu_mod_df = literature_mod_df.loc[literature_mod_df["modification"] == "psu"]
    
    concat_modifications_df = pd.DataFrame()
    for temp_modification_df in array_of_modification_dfs:
        concat_modifications_df = pd.concat([concat_modifications_df,temp_modification_df],axis=0)
        
    final_modification_df = pd.DataFrame()
    fig, axs = plt.subplots(ncols=1,nrows=2,figsize=(20,15))
    for start,end in zip(psu_mod_df["start"],psu_mod_df["end"]):
        temp_modification_df = concat_modifications_df[(concat_modifications_df["position"]).isin([end])]
        temp_modification_df = temp_modification_df[temp_modification_df["position"] > 3670]
        temp_modification_df = temp_modification_df[temp_modification_df["position"] < 4518]
        final_modification_df = pd.concat([final_modification_df,temp_modification_df],axis=0)
    print(final_modification_df.columns)
    sns.boxplot(final_modification_df,x="position",y=metric,hue="Condition", ax = axs[0])
    axs[0].legend(loc="upper right",bbox_to_anchor=[1.1,1])
    
    
    final_modification_df = pd.DataFrame()
    for start,end in zip(psu_mod_df["start"],psu_mod_df["end"]):
        temp_modification_df = concat_modifications_df[(concat_modifications_df["position"]).isin([end])]
        temp_modification_df = temp_modification_df[temp_modification_df["position"] > 4518]
        temp_modification_df = temp_modification_df[temp_modification_df["position"] < 5347]
        final_modification_df = pd.concat([final_modification_df,temp_modification_df],axis=0)
    
    sns.boxplot(final_modification_df,x="position",y=metric,hue="Condition", ax = axs[1])
    axs[1].legend(loc="upper right",bbox_to_anchor=[1.1,1])

    fig.tight_layout()
    step = 1
    step2 = 0.1
    xmin, xmax = axs[0].get_xlim()  # Get x-axis limits
    x_positions = np.arange(xmin, xmax, step)  # Generate positions for vertical lines
    x_positions2 = np.arange(xmin, xmax, step2)  # Generate positions for vertical lines

    # Draw vertical lines without modifying x-ticks
    for x in x_positions:
        axs[0].axvline(x=x, color='gray', linestyle='--', alpha=1) 
    for x in x_positions2:
        axs[0].axvline(x=x, color='gray', linestyle='--', alpha=0.1) 
    
    step = 1
    step2 = 0.1
    xmin, xmax = axs[1].get_xlim()  # Get x-axis limits
    x_positions = np.arange(xmin, xmax, step)  # Generate positions for vertical lines
    x_positions2 = np.arange(xmin, xmax, step2)  # Generate positions for vertical lines

    # Draw vertical lines without modifying x-ticks
    for x in x_positions:
        axs[1].axvline(x=x, color='gray', linestyle='--', alpha=1) 
    for x in x_positions2:
        axs[1].axvline(x=x, color='gray', linestyle='--', alpha=0.1) 
    return fig

   
    
    
fig = boxplot_modification_frequencies_per_fragment(array_of_modification_dfs,"~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed")
# plt.savefig("./18S_E_SN2_dRNA_modification_per_fragment_boxplots.svg")

fig2 = boxplot_modification_frequencies_per_fragment(array_of_modification_dfs,"~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed","rel_n_pseU")
# plt.savefig("./18S_E_SN2_dRNA_modification_PseU_only_per_fragment_boxplots.svg")

In [None]:
SN3_1_intensity_df = pd.read_csv("~/directRNA_004/20250218_SN3_1/intensity_matrix/intensity_matrix.csv",sep=";",header=0)
SN3_1_intensity_df.sort_values(by="n_reads",inplace=True,ascending=False)

SN3_1_intensity_df_47S = SN3_1_intensity_df[SN3_1_intensity_df["start"] <= 600]
SN3_1_intensity_df_47S = SN3_1_intensity_df_47S[SN3_1_intensity_df_47S["end"] >= 12500]


#434 12989 SN3_1 45S
#9 12989 + 10 12989 SN3_1 47S_01
#9 13351 SN3_1 47S
SN3_1_hdbscan_df = pd.read_csv("~/directRNA_004/20250218_SN3_1/fragment_analysis_hdbscan/fragment_df.csv",sep=";",header=0)
SN3_1_hdbscan_df = SN3_1_hdbscan_df[["ID","Refstart","Refend","Length","n_Reads","rel_n_Reads","IDS"]]
SN3_1_hdbscan_df = SN3_1_hdbscan_df[SN3_1_hdbscan_df["Refstart"] <= 600]
SN3_1_hdbscan_df = SN3_1_hdbscan_df[SN3_1_hdbscan_df["Refend"] >= 12500]
SN3_1_hdbscan_df.sort_values(by="rel_n_Reads",inplace=True,ascending=False)
SN3_1_hdbscan_df.columns = ["Reference","Start","End","Length","n_Reads","rel_n_Reads","IDS"]
# print(SN3_1_hdbscan_df)


list_of_templates_SN3_1_hdbscan = [(437,12988,"45S"),(10,12988,"47S_02"),(10,13350,"47S"),(439,13333,"47S_01")]
Intermediates_of_47S_hdbscan_SN3_1_df = pd.DataFrame()

for element in list_of_templates_SN3_1_hdbscan:
    start = element[0]
    end = element[1]
    fragment = element[2]
    temp_df = SN3_1_hdbscan_df[SN3_1_hdbscan_df["Start"] == start]
    temp_df = temp_df[temp_df["End"] == end]
    temp_df["Fragment"] = fragment
    Intermediates_of_47S_hdbscan_SN3_1_df = pd.concat([Intermediates_of_47S_hdbscan_SN3_1_df,temp_df],axis=0)

print(Intermediates_of_47S_hdbscan_SN3_1_df)

Intermediates_of_47S_hdbscan_SN3_1_df.to_csv("SN3_1_hdbscan_47S_intermediates.csv",sep=";")
    


SN3_3_intensity_df = pd.read_csv("~/directRNA_004/20250218_SN3_3/intensity_matrix/intensity_matrix.csv",sep=";",header=0)
SN3_3_intensity_df.sort_values(by="n_reads",inplace=True,ascending=False)

SN3_3_intensity_df_47S = SN3_3_intensity_df[SN3_3_intensity_df["start"] <= 600]
SN3_3_intensity_df_47S = SN3_3_intensity_df_47S[SN3_3_intensity_df_47S["end"] >= 12500]

# print(SN3_3_intensity_df_47S.head(40))

# 9 13351 + 10 13351 SN3_3 47S
# 434 12989 45S
# 9 12989 + 10 12989 SN3_3 47S_01

# 2270    442  13351        2  ['ffcc4eb4-d903-47dc-a874-22a324483623', 'a8cd...
# 1821    440  13351        2  ['ab916bfe-9f9d-43e1-a764-e9645423f0d1', '2ae2...
# 989     434  13337        1           ['1eddc994-6efe-428c-bc7a-cb1bf7494c45']
# 988     434  13323        1           ['e2d11d4f-fc6d-404d-ac38-e972fe0e69ec']
# 1401    437  13337        1           ['de1b46f6-b56d-4654-80d0-21c99dbd35e6']

SN3_3_hdbscan_df = pd.read_csv("~/directRNA_004/20250218_SN3_3/fragment_analysis_hdbscan/fragment_df.csv",sep=";",header=0)
SN3_3_hdbscan_df = SN3_3_hdbscan_df[["ID","Refstart","Refend","Length","n_Reads","rel_n_Reads","IDS"]]
SN3_3_hdbscan_df = SN3_3_hdbscan_df[SN3_3_hdbscan_df["Refstart"] <= 600]
SN3_3_hdbscan_df = SN3_3_hdbscan_df[SN3_3_hdbscan_df["Refend"] >= 12500]
SN3_3_hdbscan_df.sort_values(by="rel_n_Reads",inplace=True,ascending=False)
SN3_3_hdbscan_df.columns = ["Reference","Start","End","Length","n_Reads","rel_n_Reads","IDS"]

# print(SN3_3_hdbscan_df)
#10 12988 37r
#437 12988 813r
#441 13334 20r
#11 13350 12r
Intermediates_of_47S_hdbscan_SN3_3_df = pd.DataFrame()

list_of_templates_SN3_3_hdbscan = [(437,12988,"45S"),(10,12988,"47S_02"),(11,13350,"47S"),(441,13334,"47S_01")]
for element in list_of_templates_SN3_3_hdbscan:
    start = element[0]
    end = element[1]
    fragment = element[2]
    temp_df = SN3_3_hdbscan_df[SN3_3_hdbscan_df["Start"] == start]
    temp_df = temp_df[temp_df["End"] == end]
    temp_df["Fragment"] = fragment
    Intermediates_of_47S_hdbscan_SN3_3_df = pd.concat([Intermediates_of_47S_hdbscan_SN3_3_df,temp_df],axis=0)

print(Intermediates_of_47S_hdbscan_SN3_3_df)

Intermediates_of_47S_hdbscan_SN3_3_df.to_csv("SN3_3_hdbscan_47S_intermediates.csv",sep=";")


In [None]:
fig,ax = plt.subplots(nrows=1,ncols=1,figsize=(20,10))
plt.xticks(rotation = 90)


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="SN3_1_hdbscan_47S_intermediates.csv",
                   fragment_to_analyse="47S",
                   color="green",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_47S_hdbscan",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="SN3_1_47S_hdbscan_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="SN3_3_hdbscan_47S_intermediates.csv",
                   fragment_to_analyse="47S",
                   color="violet",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_47S_hdbscan",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="SN3_3_47S_hdbscan_modification_quantification.csv"
                   )


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="SN3_1_hdbscan_47S_intermediates.csv",
                   fragment_to_analyse="47S_01",
                   color="green",
                   markerstyle = "s",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_47S_01_hdbscan",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="SN3_1_47S_01_hdbscan_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="SN3_3_hdbscan_47S_intermediates.csv",
                   fragment_to_analyse="47S_01",
                   color="violet",
                   markerstyle = "s",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_47S_01_hdbscan",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="SN3_3_47S_01_hdbscan_modification_quantification.csv"
                   )


view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="SN3_1_hdbscan_47S_intermediates.csv",
                   fragment_to_analyse="47S_02",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_47S_02_hdbscan",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="SN3_1_47S_02_hdbscan_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="SN3_3_hdbscan_47S_intermediates.csv",
                   fragment_to_analyse="47S_02",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_47S_02_hdbscan",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="SN3_3_47S_02_hdbscan_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="SN3_1_hdbscan_47S_intermediates.csv",
                   fragment_to_analyse="45S",
                   color="green",
                   markerstyle = "P",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_45S_hdbscan",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="SN3_1_45S_hdbscan_modification_quantification.csv"
                   )

view_modification_per_fragment(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="SN3_3_hdbscan_47S_intermediates.csv",
                   fragment_to_analyse="45S",
                   color="violet",
                   markerstyle = "P",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_45S_hdbscan",
                   fig = fig,
                   ax = ax,
                   modification_csv_path="SN3_1_45S_hdbscan_modification_quantification.csv"
                   )


ax.legend(loc='upper right')
ax.set_xlabel("PseU position on 45S")
ax.set_ylabel("Modification frequeny")
ax.set_ylim(0,1.1)

for tick in ax.get_xticks():
    ax.axvline(tick, color='black', linestyle='--', linewidth=1, alpha=.5)

for tick in ax.get_yticks():
    ax.axhline(tick, color='gray', linestyle='--', linewidth=1, alpha=.2)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

In [9]:
!pip install seaborn
import seaborn as sns

SN3_1_47S_hdbscan_mod_df = pd.read_csv("./SN3_1_47S_hdbscan_modification_quantification.csv", sep = ";",header = 0)
SN3_1_47S_hdbscan_mod_df["Condition"] = "47S SN3 hdbscan"
SN3_3_47S_hdbscan_mod_df = pd.read_csv("./SN3_3_47S_hdbscan_modification_quantification.csv", sep = ";",header = 0)
SN3_3_47S_hdbscan_mod_df["Condition"] = "47S SN3 hdbscan"

SN3_1_47S_01_hdbscan_mod_df = pd.read_csv("./SN3_1_47S_01_hdbscan_modification_quantification.csv", sep = ";",header = 0)
SN3_1_47S_01_hdbscan_mod_df["Condition"] = "47S-01 SN3 hdbscan"
SN3_3_47S_01_hdbscan_mod_df = pd.read_csv("./SN3_3_47S_01_hdbscan_modification_quantification.csv", sep = ";",header = 0)
SN3_3_47S_01_hdbscan_mod_df["Condition"] = "47S-01 SN3 hdbscan"

SN3_1_47S_02_hdbscan_mod_df = pd.read_csv("./SN3_1_47S_02_hdbscan_modification_quantification.csv", sep = ";",header = 0)
SN3_1_47S_02_hdbscan_mod_df["Condition"] = "47S-02 SN3 hdbscan"
SN3_3_47S_02_hdbscan_mod_df = pd.read_csv("./SN3_3_47S_02_hdbscan_modification_quantification.csv", sep = ";",header = 0)
SN3_3_47S_02_hdbscan_mod_df["Condition"] = "47S-02 SN3 hdbscan"



SN3_1_45S_hdbscan_mod_df = pd.read_csv("./SN3_1_45S_modification_quantification.csv", sep = ";",header = 0)
SN3_1_45S_hdbscan_mod_df["Condition"] = "45S SN3 hdbscan"
SN3_3_45S_hdbscan_mod_df = pd.read_csv("./SN3_3_45S_modification_quantification.csv", sep = ";",header = 0)
SN3_3_45S_hdbscan_mod_df["Condition"] = "45S SN3 hdbscan"

IVT_mod_df = pd.read_csv("./IVT_18S_modification_quantification.csv", sep = ";",header = 0)
IVT_mod_df["Condition"] = "IVT"

array_of_modification_dfs = [
                            SN3_1_47S_hdbscan_mod_df,
                            SN3_3_47S_hdbscan_mod_df,
                            SN3_1_47S_01_hdbscan_mod_df,
                            SN3_3_47S_01_hdbscan_mod_df,
                            SN3_1_47S_02_hdbscan_mod_df,
                            SN3_3_47S_02_hdbscan_mod_df,
                            SN3_1_45S_hdbscan_mod_df,
                            SN3_3_45S_hdbscan_mod_df,
                            IVT_mod_df
                            ]

print(array_of_modification_dfs)
   
    
#fig = boxplot_modification_frequencies_per_fragment(array_of_modification_dfs,"~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed")
# plt.savefig("SN3_hdbscan_47S_intermediates_dRNA_modification_per_fragment_boxplots.svg")
#fig2 = boxplot_modification_frequencies_per_fragment(array_of_modification_dfs,"~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed","rel_n_pseU")
# plt.savefig("SN3_hdbscan_47S_intermediates_dRNA_modification_PseU_only_per_fragment_boxplots.svg")

In [2]:
from pathlib import Path
import numpy as np
import pysam
from tqdm import tqdm
from itertools import repeat
import polars as pl
import matplotlib.pyplot as plt
import pandas as pd
import plotly.graph_objects as go
import plotly
import plotly.io as pio
import argparse
from random import seed,uniform
import gc 
import ctypes


gc.collect()

libc = ctypes.CDLL("libc.so.6") # clearing cache 
libc.malloc_trim(0)

def view_modification_per_read(bamfile_path:str, fragment_df_path:str,fragment_to_analyse: str,color:str,markerstyle:str, reference_path:str, literature_mod_df_path:str, condition:str, fig, ax, modification_csv_path=""):
    fragment_df = pl.read_csv(fragment_df_path, separator=";",has_header=True,columns=["Reference","Start","End","Fragment","Length","IDS","n_Reads","rel_n_Reads"])
    #print(fragment_df)
    fragment_df = fragment_df.to_pandas()
    fragment_df = fragment_df[fragment_df["Fragment"] == fragment_to_analyse]
    #print(fragment_df)
    fragment_ids = fragment_df["IDS"].to_list()[0]
    IDS_of_interest = eval(fragment_ids)
    #print(IDS_of_interest[0])
    dict_IDS_of_interest = {}
    for id in IDS_of_interest:
        dict_IDS_of_interest[id] = id
    
    literature_mod_df = pd.read_csv(literature_mod_df_path ,sep="\t",header=None,index_col=None) #"~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed"
    literature_mod_df.columns = ["reference","start","end","modification","A","B","C"]

    psu_mod_df = literature_mod_df.loc[literature_mod_df["modification"] == "psu"]
    #psU_mod_df_positions = [int(end) for end in list(psu_mod_df["end"]) if int(end) < 6500]
    #Um_mod_df = literature_mod_df.loc[literature_mod_df["modification"] == "Um"]
    
    #A_mod_df = literature_mod_df.loc[literature_mod_df["modification"].isin(["Am","m62A","m6A"])] 


    fasta_file = pysam.FastaFile(reference_path)
    reference = fasta_file.references[0]
    reference_sequence = str(fasta_file.fetch(reference))

    
    bamfile = pysam.AlignmentFile(bamfile_path, mode="rb")
    counter = 0
    read_modification_df = pl.DataFrame()
    read_modification_C_U_df = pl.DataFrame()
    global read_modification_dict 
    read_modification_dict = {}
    global read_modification_C_U_dict
    read_modification_C_U_dict = {}
    for i in tqdm(bamfile.fetch(until_eof=True)):
        if i.is_supplementary:
            continue
        try:
            found_id = dict_IDS_of_interest[i.query_name]
        except KeyError:
            continue           
        if condition == "IVT_18S" and counter >= 20000:
            break   
        #if np.random.randint(0,6) < 5:
        #    continue
        #if i.infer_read_length() < 1900:
        #    continue
        mod_positions_pseU = [0 for i in range(len(reference_sequence))]
        number_of_basecalled_C = [0 for i in range(len(reference_sequence))]         
        counter += 1
        mod_obj = i.modified_bases
        if mod_obj != None:
            # try:
            #     mod_m6a = list(mod_obj[('A', 0, 'a')])
            # except KeyError:
            #     mod_m6a = None
            try:
                mod_pseU = list(mod_obj[('T', 0, 17802)])
            except KeyError:
                mod_pseU = None
            try:
                aligned_pairs = i.get_aligned_pairs(with_seq=True)
                alignment_dict = {}
                for pair_element in aligned_pairs:
                    if None not in pair_element:
                        alignment_dict[str(pair_element[0])] = {"index_query":pair_element[0],
                                                                "index_reference":pair_element[1],
                                                                "base_query": str(i.get_forward_sequence())[pair_element[0]],
                                                                "base_reference": reference_sequence[pair_element[1]]
                                                                }
                #if mod_m6a != None:
                #    for mod_base in mod_m6a:
                #        p = ((mod_base[1] + 1)/256)
                #        if p >= 0.95 and str(mod_base[0]) in alignment_dict:
                #            mod_positions_m6a[alignment_dict[str(mod_base[0])]["index_reference"]] += 1

                if mod_pseU != None:
                    for mod_base2 in mod_pseU:
                        p = ((mod_base2[1] + 1)/256)
                        if p >= 0.95 and str(mod_base2[0]) in alignment_dict:
                            mod_positions_pseU[alignment_dict[str(mod_base2[0])]["index_reference"]] += 1
                            
                for pair_element in aligned_pairs:
                    if None not in pair_element:
                        if alignment_dict[str(pair_element[0])]["base_query"] == "C" and alignment_dict[str(pair_element[0])]["base_reference"] == "T":
                            number_of_basecalled_C[pair_element[1]] += 1
            except TypeError:
                print("A type error occured")
        mod_transformed = np.array(mod_positions_pseU)
        mod_C_U_transformed = np.array(mod_positions_pseU) + np.array(number_of_basecalled_C)
        read_modification_dict[i.query_name] = mod_transformed
        read_modification_C_U_dict[i.query_name] = mod_C_U_transformed
        if len(read_modification_dict.keys()) >= 10000:
            temp_modification_df = pl.from_dict(read_modification_dict)
            read_modification_df = pl.concat([read_modification_df, temp_modification_df], how="horizontal")
            read_modification_dict = {}
            gc.collect()
            libc = ctypes.CDLL("libc.so.6") # clearing cache 
            libc.malloc_trim(0) 
            print(read_modification_df.shape)
        if len(read_modification_C_U_dict.keys()) >= 10000:
            temp_modification_C_U_df = pl.from_dict(read_modification_C_U_dict)
            read_modification_C_U_df = pl.concat([read_modification_C_U_df, temp_modification_C_U_df], how="horizontal")
            read_modification_C_U_dict = {}
            gc.collect()
            libc = ctypes.CDLL("libc.so.6") # clearing cache 
            libc.malloc_trim(0)
            print(read_modification_C_U_df.shape)

    temp_modification_df = pl.from_dict(read_modification_dict)
    read_modification_df = pl.concat([read_modification_df, temp_modification_df], how="horizontal")
    temp_modification_C_U_df = pl.from_dict(read_modification_C_U_dict)
    read_modification_C_U_df = pl.concat([read_modification_C_U_df, temp_modification_C_U_df], how="horizontal")
    
    read_modification_df.write_csv(f"./modifications_per_read_{condition}.csv",separator=";")
    read_modification_C_U_df.write_csv(f"./modifications_per_read_C_U_{condition}.csv",separator=";")

    #return fig
    

In [14]:
fig,ax = plt.subplots(nrows=1,ncols=1,figsize=(20,10))
plt.xticks(rotation = 90)

#### IVT ####

view_modification_per_read(bamfile_path="~/directRNA_004/IVT_18S/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="red",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "IVT_18S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                 )
### 47S ####


view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN1_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="47S",
                   color="palevioletred",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_1_47S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN1_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="47S",
                   color="blue",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_3_47S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )





view_modification_per_read(bamfile_path="~/directRNA_004/20250319_SN2_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250319_SN2_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="47S",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_1_47S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN2_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="47S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_2_47S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN2_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="47S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_3_47S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )








view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="47S",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_47S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN3_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="47S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_2_47S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="47S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_47S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


### 45S ####

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN1_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="45S",
                   color="palevioletred",
                   markerstyle = "v",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_1_45S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN1_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="45S",
                   color="blue",
                   markerstyle = "v",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_3_45S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )






#### 43S ####

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="43S",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_43S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN3_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="43S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_2_43S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="43S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_43S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )




#### 41S ####


view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="41S",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_41S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN3_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="41S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_2_41S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="41S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_41S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )



view_modification_per_read(bamfile_path="~/directRNA_004/20250319_SN2_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250319_SN2_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="45S",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_1_45S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN2_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="45S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_2_45S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN2_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="45S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_3_45S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                 )

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="45S",
                   color="green",
                   markerstyle = "v",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_45S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN3_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="45S",
                   color="violet",
                   markerstyle = "v",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_2_45S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="45S",
                   color="violet",
                   markerstyle = "v",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_45S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


#### 30S+1S ####

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN1_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S+1",
                   color="palevioletred",
                   markerstyle = "s",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_1_30S+1",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN1_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S+1",
                   color="blue",
                   markerstyle = "s",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_3_30S+1",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )



view_modification_per_read(bamfile_path="~/directRNA_004/20250319_SN2_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250319_SN2_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S+1",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_1_30S+1",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN2_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S+1",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_2_30S+1",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN2_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S+1",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_3_30S+1",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )








view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S+1",
                   color="green",
                   markerstyle = "s",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_30S+1",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN3_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S+1",
                   color="violet",
                   markerstyle = "s",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_2_30S+1",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S+1",
                   color="violet",
                   markerstyle = "s",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_30S+1",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


#### 30S ####

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN1_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S",
                   color="palevioletred",
                   markerstyle = "D",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_1_30S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN1_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S",
                   color="blue",
                   markerstyle = "D",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_3_30S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )




view_modification_per_read(bamfile_path="~/directRNA_004/20250319_SN2_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250319_SN2_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_1_30S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN2_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_2_30S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN2_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_3_30S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )








view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S",
                   color="green",
                   markerstyle = "D",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_30S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN3_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S",
                   color="violet",
                   markerstyle = "D",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_2_30S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="30S",
                   color="violet",
                   markerstyle = "D",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_30S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


### 26S ####

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN1_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="26S",
                   color="palevioletred",
                   markerstyle = "2",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_1_26S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN1_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="26S",
                   color="blue",
                   markerstyle = "2",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_3_26S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )






view_modification_per_read(bamfile_path="~/directRNA_004/20250319_SN2_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250319_SN2_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="26S",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_1_26S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN2_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="26S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_2_26S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN2_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="26S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_3_26S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )








view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="26S",
                   color="green",
                   markerstyle = "2",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_26S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN3_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="26S",
                   color="violet",
                   markerstyle = "2",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_2_26S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="26S",
                   color="violet",
                   markerstyle = "2",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_26S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


# ### 21S ####

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN1_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S",
                   color="palevioletred",
                   markerstyle = "X",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_1_21S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN1_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S",
                   color="blue",
                   markerstyle = "X",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_3_21S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )





view_modification_per_read(bamfile_path="~/directRNA_004/20250319_SN2_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250319_SN2_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_1_21S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN2_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_2_21S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN2_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_3_21S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )











view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S",
                   color="green",
                   markerstyle = "X",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_21S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )



view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN3_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S",
                   color="violet",
                   markerstyle = "X",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_2_21S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                  )

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S",
                   color="violet",
                   markerstyle = "X",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_21S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                  )


### 21S-C ####

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN1_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S-C",
                   color="palevioletred",
                   markerstyle = "P",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_1_21S-C",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN1_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S-C",
                   color="blue",
                   markerstyle = "P",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_3_32S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )




view_modification_per_read(bamfile_path="~/directRNA_004/20250319_SN2_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250319_SN2_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S-C",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_1_21S-C",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN2_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S-C",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_2_21S-C",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN2_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S-C",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_3_21S-C",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )





view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S-C",
                   color="green",
                   markerstyle = "P",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_21S-C",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN3_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S-C",
                   color="violet",
                   markerstyle = "P",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_2_21S-C",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="21S-C",
                   color="violet",
                   markerstyle = "P",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_21S-C",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


## 18S-E ####


view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN1_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S-E",
                   color="palevioletred",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_1_18S-E",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN1_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN1_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S-E",
                   color="blue",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_2_18S-E",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN1_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S-E",
                   color="blue",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_3_18S-E",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )




view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S-E",
                   color="green",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_18S-E",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN3_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S-E",
                   color="violet",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_2_18S-E",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                  )

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S-E",
                   color="violet",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_18S-E",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )





# ### 18S ####

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN1_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="palevioletred",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_1_18S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN1_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN1_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="blue",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_2_18S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN1_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN1_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="blue",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN1_3_18S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )



view_modification_per_read(bamfile_path="~/directRNA_004/20250319_SN2_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250319_SN2_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="green",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_1_18S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )


view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN2_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_2_18S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN2_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN2_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="violet",
                   markerstyle = "*",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN2_3_18S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )






view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_1/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_1/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="green",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_1_18S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

view_modification_per_read(bamfile_path="~/directRNA_004/20250409_SN3_2/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250409_SN3_2/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="violet",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_2_18S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                  )

view_modification_per_read(bamfile_path="~/directRNA_004/20250218_SN3_3/filtered_pod5/filtered_pod5_basecalled.bam",
                   fragment_df_path="~/directRNA_004/20250218_SN3_3/template_based_analysis/template_fragment_df.csv",
                   fragment_to_analyse="18S",
                   color="violet",
                   markerstyle = "o",
                   reference_path="~/wf-nanoribolyzer/references/RNA45SN1.fasta",
                   literature_mod_df_path="~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed",
                   condition = "SN3_3_18S",
                   fig = fig,
                   ax = ax,
                   modification_csv_path=""
                   )

In [3]:
import seaborn as sns
SN3_1_47S_mod_df = pl.read_csv("./modifications_per_read_SN3_1_47S.csv", separator = ";")
#SN3_1_47S_mod_df["Condition"] = "47S SN3"
SN3_2_47S_mod_df = pl.read_csv("./modifications_per_read_SN3_2_47S.csv", separator = ";")
#SN3_2_47S_mod_df["Condition"] = "47S SN3"
SN3_3_47S_mod_df = pl.read_csv("./modifications_per_read_SN3_3_47S.csv", separator = ";")
#SN3_3_47S_mod_df["Condition"] = "47S SN3"

SN3_1_45S_mod_df = pl.read_csv("./modifications_per_read_SN3_1_45S.csv", separator = ";")
#SN3_1_45S_mod_df["Condition"] = "45S SN3"
SN3_2_45S_mod_df = pl.read_csv("./modifications_per_read_SN3_2_45S.csv", separator = ";")
#SN3_2_45S_mod_df["Condition"] = "45S SN3"
SN3_3_45S_mod_df = pl.read_csv("./modifications_per_read_SN3_3_45S.csv", separator = ";")
#SN3_3_45S_mod_df["Condition"] = "45S SN3"

SN3_1_43S_mod_df = pl.read_csv("./modifications_per_read_SN3_1_43S.csv", separator = ";")
#SN3_1_43S_mod_df["Condition"] = "43S SN3"
SN3_2_43S_mod_df = pl.read_csv("./modifications_per_read_SN3_2_43S.csv", separator = ";")
#SN3_2_43S_mod_df["Condition"] = "43S SN3"
SN3_3_43S_mod_df = pl.read_csv("./modifications_per_read_SN3_3_43S.csv", separator = ";")
#SN3_3_43S_mod_df["Condition"] = "43S SN3"

SN3_1_41S_mod_df = pl.read_csv("./modifications_per_read_SN3_1_41S.csv", separator = ";")
#SN3_1_41S_mod_df["Condition"] = "41S SN3"
SN3_2_41S_mod_df = pl.read_csv("./modifications_per_read_SN3_2_41S.csv", separator = ";")
#SN3_2_41S_mod_df["Condition"] = "41S SN3"
SN3_3_41S_mod_df = pl.read_csv("./modifications_per_read_SN3_3_41S.csv", separator = ";")
#SN3_3_41S_mod_df["Condition"] = "41S SN3"

SN3_1_30S_1_mod_df = pl.read_csv("./modifications_per_read_SN3_1_30S+1.csv", separator = ";")
#SN3_1_30S_1_mod_df["Condition"] = "30S+1 SN3"
SN3_2_30S_1_mod_df = pl.read_csv("./modifications_per_read_SN3_2_30S+1.csv", separator = ";")
#SN3_2_30S_1_mod_df["Condition"] = "30S+1 SN3"
SN3_3_30S_1_mod_df = pl.read_csv("./modifications_per_read_SN3_3_30S+1.csv", separator = ";")
#SN3_3_30S_1_mod_df["Condition"] = "30S+1 SN3"

SN3_1_30S_mod_df = pl.read_csv("./modifications_per_read_SN3_1_30S.csv", separator = ";")
#SN3_1_30S_mod_df["Condition"] = "30S SN3"
SN3_2_30S_mod_df = pl.read_csv("./modifications_per_read_SN3_2_30S.csv", separator = ";")
#SN3_2_30S_mod_df["Condition"] = "30S SN3"
SN3_3_30S_mod_df = pl.read_csv("./modifications_per_read_SN3_3_30S.csv", separator = ";")
#SN3_3_30S_mod_df["Condition"] = "30S SN3"

SN3_1_26S_mod_df = pl.read_csv("./modifications_per_read_SN3_1_26S.csv", separator = ";")
#SN3_1_26S_mod_df["Condition"] = "26S SN3"
SN3_2_26S_mod_df = pl.read_csv("./modifications_per_read_SN3_2_26S.csv", separator = ";")
#SN3_2_26S_mod_df["Condition"] = "26S SN3"
SN3_3_26S_mod_df = pl.read_csv("./modifications_per_read_SN3_3_26S.csv", separator = ";")
#SN3_3_26S_mod_df["Condition"] = "26S SN3"

SN3_1_21S_mod_df = pl.read_csv("./modifications_per_read_SN3_1_21S.csv", separator = ";")
#SN3_1_21S_mod_df["Condition"] = "21S SN3"
SN3_2_21S_mod_df = pl.read_csv("./modifications_per_read_SN3_2_21S.csv", separator = ";")
#SN3_2_21S_mod_df["Condition"] = "21S SN3"
SN3_3_21S_mod_df = pl.read_csv("./modifications_per_read_SN3_3_21S.csv", separator = ";")
#SN3_3_21S_mod_df["Condition"] = "21S SN3"

SN3_1_21S_C_mod_df = pl.read_csv("./modifications_per_read_SN3_1_21S-C.csv", separator = ";")
#SN3_1_21S_C_mod_df["Condition"] = "21S-C SN3"
SN3_2_21S_C_mod_df = pl.read_csv("./modifications_per_read_SN3_2_21S-C.csv", separator = ";")
#SN3_2_21S_C_mod_df["Condition"] = "21S-C SN3"
SN3_3_21S_C_mod_df = pl.read_csv("./modifications_per_read_SN3_3_21S-C.csv", separator = ";")
#SN3_3_21S_C_mod_df["Condition"] = "21S-C SN3"



SN3_1_18S_E_mod_df = pl.read_csv("./modifications_per_read_SN1_1_18S-E.csv", separator = ";")
#SN3_1_18S_E_mod_df["Condition"] = "18S-E SN3"
SN3_2_18S_E_mod_df = pl.read_csv("./modifications_per_read_SN1_2_18S-E.csv", separator = ";")
#SN3_2_18S_E_mod_df["Condition"] = "18S-E SN3"
SN3_3_18S_E_mod_df = pl.read_csv("./modifications_per_read_SN1_3_18S-E.csv", separator = ";")
#SN3_3_18S_E_mod_df["Condition"] = "18S-E SN3"

SN3_1_18S_mod_df = pl.read_csv("./modifications_per_read_SN3_1_18S.csv", separator = ";")
#SN3_1_18S_mod_df["Condition"] = "18S SN3"
SN3_2_18S_mod_df = pl.read_csv("./modifications_per_read_SN3_2_18S.csv", separator = ";")
#SN3_2_18S_mod_df["Condition"] = "18S SN3"
SN3_3_18S_mod_df = pl.read_csv("./modifications_per_read_SN3_3_18S.csv", separator = ";")

SN1_1_18S_E_mod_df = pl.read_csv("./modifications_per_read_SN1_1_18S-E.csv", separator = ";")
#SN1_1_18S_E_mod_df["Condition"] = "18S-E SN1"
SN1_2_18S_E_mod_df = pl.read_csv("./modifications_per_read_SN1_2_18S-E.csv", separator = ";")
#SN1_2_18S_E_mod_df["Condition"] = "18S-E SN1"
SN1_3_18S_E_mod_df = pl.read_csv("./modifications_per_read_SN1_3_18S-E.csv",separator = ";")
#SN1_3_18S_E_mod_df["Condition"] = "18S-E SN1"

SN1_1_18S_mod_df = pl.read_csv("./modifications_per_read_SN1_1_18S.csv", separator = ";")
#SN1_1_18S_mod_df["Condition"] = "18S SN1"
SN1_2_18S_mod_df = pl.read_csv("./modifications_per_read_SN1_2_18S.csv", separator = ";")
#SN1_2_18S_mod_df["Condition"] = "18S SN1"
SN1_3_18S_mod_df = pl.read_csv("./modifications_per_read_SN1_3_18S.csv", separator = ";")
#SN1_3_18S_mod_df["Condition"] = "18S SN1"

#IVT_mod_df = pd.read_csv("./IVT_18S_modification_quantification.csv", sep = ";",header = 0)
#IVT_mod_df["Condition"] = "IVT"




In [None]:
def plot_correlation_matrix(df:pl.DataFrame, modification_reference_df_path:str, condition_name:str):
    modification_reference_df = pd.read_csv(modification_reference_df_path,sep="\t").dropna(axis=1)
    modification_reference_df.columns = ["Template","Start","End","Modification"]
    modification_reference_df = modification_reference_df[modification_reference_df["Modification"] == "psu"]
    df = df[list([i-1 for i in modification_reference_df["End"]]), 1:]
    #print(df.sum())
    df = df.to_pandas()
    df.index = modification_reference_df["End"]
    df = pd.DataFrame(df.transpose())
    df_corr = df.corr()
    df_corr = df_corr.fillna(0)
    print(df_corr)
    fig,ax = plt.subplots(nrows=1,ncols=1,figsize=(30,20))
    x = sns.heatmap(df_corr,vmin=-1,vmax=1,ax=ax,cmap="viridis")
    fig.show()
    plt.savefig(f"~/Figure/single_correlation_matrix_{condition_name}.svg")
    plt.savefig(f"~/Figure/single_correlation_matrix_{condition_name}.png")
    

    

In [75]:
plot_correlation_matrix(SN3_1_47S_mod_df, "~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed","SN3_1_47S")
plot_correlation_matrix(SN3_2_47S_mod_df, "~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed","SN3_2_47S")
plot_correlation_matrix(SN3_3_47S_mod_df, "~/wf-nanoribolyzer/references/rRNA_modifications_conv.bed","SN3_3_47S")