In [None]:
from __future__ import annotations
import pandas as pd
import numpy as np
import os

#pip install jdgenometracks
import jdgenometracks as jdg
from jdgenometracks import TrackFactory


jdgenometracks can be installed via pip:

```bash
pip install jdgenometracks
```

## Prepare Data Input

You will need to have run the CRISPR_SURF demo. Additionally, you will need the Millipede outputs generated from Step 3, and the baseline and enriched editing efficiency outputs generated from step 4. 

If you would like to plot motifs and phyloP scores, add their file paths as well.


In [None]:
WORKING_DIR = ""

ABE8e_SURF_path = "" + "/tracks_for_plotting/"
ABE8e_sigma_hit_path = ""
ABE8e_baseline_editing_efficiency_path = ""
ABE8e_enriched_editing_efficiency_path = ""
ABE8e_sig1_motifs_path = ""
ABE8e_sig2_motifs_path = ""

evoCDA_surf_path = "" + "/tracks_for_plotting/"
evoCDA_sigma_hit_path = ""
evoCDA_baseline_editing_efficiency_path = ""
evoCDA_enriched_editing_efficiency_path = ""
evoCDA_sig1_motifs_path = ""

phyloP_path = ""

# Data Processing

In [None]:
Abe8eScoresDF = pd.read_csv(ABE8e_sigma_hit_path, index_col = 0 )
Abe8eScoresDF.reset_index(drop=False, inplace=True)

Abe8eScoresDF[['MainIndex','Base','Waste','BaseChange']] = Abe8eScoresDF['index'].str.extract('(^\d+)([A-Z])([->|-])([-]|[A-Z])')
Abe8eScoresDF = Abe8eScoresDF.dropna()
Abe8eScoresDF['BaseChange'] = Abe8eScoresDF.apply(lambda x: x['Base'] if x['BaseChange']=="-" else x['BaseChange'], axis=1)

Abe8eScoresDF['FinalIndex'] = Abe8eScoresDF['MainIndex'] + Abe8eScoresDF['Base']
Abe8eScoresDFFinal = Abe8eScoresDF[['FinalIndex','Coefficient','PIP', 'MainIndex','Base','BaseChange']]
Abe8eScoresDFFinal['MainIndex'] = Abe8eScoresDFFinal['MainIndex'].astype(int)+28930687
Abe8eScoresDFFinal = Abe8eScoresDFFinal.sort_values(['MainIndex', "BaseChange"], ascending=True)
Abe8eScoresDFFinal = Abe8eScoresDFFinal[Abe8eScoresDFFinal["Base"]!=Abe8eScoresDFFinal["BaseChange"]].reset_index(drop=True)

Abe8eScoresDFFinal["edit"] = Abe8eScoresDFFinal.groupby("MainIndex").cumcount() + 1

filtered = Abe8eScoresDFFinal[(Abe8eScoresDFFinal["Base"] + ">" + Abe8eScoresDFFinal["BaseChange"]).str.contains("A>G")]
temp = pd.DataFrame()
temp["chr"] = "chr16"
temp["start"] = filtered["MainIndex"]
temp["end"] = filtered["MainIndex"]+1
temp["score"] = filtered["Coefficient"]
temp["name"] = filtered["Base"] + ">" + filtered["BaseChange"]
temp["PIP"] = filtered["PIP"]
temp["chr"] = "chr16"
temp.to_csv(f"{os.path.dirname(ABE8e_sigma_hit_path)}/ABE8e_MillipedeScores_edit_A>G.bedgraph", sep="\t", index=False, header=False)

filtered = Abe8eScoresDFFinal[(Abe8eScoresDFFinal["Base"] + ">" + Abe8eScoresDFFinal["BaseChange"]).str.contains("T>C")]
temp = pd.DataFrame()
temp["chr"] = "chr16"
temp["start"] = filtered["MainIndex"]
temp["end"] = filtered["MainIndex"]+1
temp["score"] = filtered["Coefficient"]
temp["name"] = filtered["Base"] + ">" + filtered["BaseChange"]
temp["PIP"] = filtered["PIP"]
temp["chr"] = "chr16"
temp.to_csv(f"{os.path.dirname(ABE8e_sigma_hit_path)}/ABE8e_MillipedeScores_edit_T>C.bedgraph", sep="\t", index=False, header=False)

edit1 = pd.read_csv(f"{os.path.dirname(ABE8e_sigma_hit_path)}/ABE8e_MillipedeScores_edit_A>G.bedgraph", sep = '\t', header = None)
edit2 = pd.read_csv(f"{os.path.dirname(ABE8e_sigma_hit_path)}/ABE8e_MillipedeScores_edit_T>C.bedgraph", sep = '\t', header = None)

edit1.rename(columns={4: 'Col5_df1', 5: 'Col6_df1'}, inplace=True)
edit2.rename(columns={4: 'Col5_df2', 5: 'Col6_df2'}, inplace=True)

merged_df = edit1.merge(edit2, on=[0, 1, 2], how='outer')
max_col6_values = merged_df[['Col6_df1', 'Col6_df2']].max(axis=1)

columns_123 = merged_df[[0, 1, 2]]

new_df = columns_123.copy()
new_df['Max_Col6'] = max_col6_values
new_df.to_csv(f"{os.path.dirname(ABE8e_sigma_hit_path)}/ABE8e_PIP.bedgraph", sep="\t", index=False, header=False)


In [None]:
EvoCDAScoresDF = pd.read_csv(evoCDA_sigma_hit_path, index_col = 0)
EvoCDAScoresDF.reset_index(drop=False, inplace=True)

EvoCDAScoresDF[['MainIndex','Base','Waste','BaseChange']] = EvoCDAScoresDF['index'].str.extract('(^\d+)([A-Z])([->|-])([-]|[A-Z])')
EvoCDAScoresDF = EvoCDAScoresDF.dropna()
EvoCDAScoresDF['BaseChange'] = EvoCDAScoresDF.apply(lambda x: x['Base'] if x['BaseChange']=="-" else x['BaseChange'], axis=1)

EvoCDAScoresDF['FinalIndex'] = EvoCDAScoresDF['MainIndex'] + EvoCDAScoresDF['Base']
EvoCDAScoresDFFinal = EvoCDAScoresDF[['FinalIndex','Coefficient','PIP', 'MainIndex','Base','BaseChange']]
EvoCDAScoresDFFinal['MainIndex'] = EvoCDAScoresDFFinal['MainIndex'].astype(int)+28930687
EvoCDAScoresDFFinal = EvoCDAScoresDFFinal.sort_values(['MainIndex', "BaseChange"], ascending=True)
EvoCDAScoresDFFinal = EvoCDAScoresDFFinal[EvoCDAScoresDFFinal["Base"]!=EvoCDAScoresDFFinal["BaseChange"]].reset_index(drop=True)

filtered = EvoCDAScoresDFFinal[(EvoCDAScoresDFFinal["Base"] + ">" + EvoCDAScoresDFFinal["BaseChange"]).str.contains("C>T")]
temp = pd.DataFrame()
temp["chr"] = "chr16"
temp["start"] = filtered["MainIndex"]
temp["end"] = filtered["MainIndex"]+1
temp["score"] = filtered["Coefficient"]
temp["name"] = filtered["Base"] + ">" + filtered["BaseChange"]
temp["PIP"] = filtered["PIP"]
temp["chr"] = "chr16"
temp.to_csv(f"{os.path.dirname(evoCDA_sigma_hit_path)}/evoCDA_MillipedeScores_edit_C>T.bedgraph", sep="\t", index=False, header=False)

filtered = EvoCDAScoresDFFinal[(EvoCDAScoresDFFinal["Base"] + ">" + EvoCDAScoresDFFinal["BaseChange"]).str.contains("G>A")]
temp = pd.DataFrame()
temp["chr"] = "chr16"
temp["start"] = filtered["MainIndex"]
temp["end"] = filtered["MainIndex"]+1
temp["score"] = filtered["Coefficient"]
temp["name"] = filtered["Base"] + ">" + filtered["BaseChange"]
temp["PIP"] = filtered["PIP"]
temp["chr"] = "chr16"
temp.to_csv(f"{os.path.dirname(evoCDA_sigma_hit_path)}/evoCDA_MillipedeScores_edit_G>A.bedgraph", sep="\t", index=False, header=False)

edit1 = pd.read_csv(f"{os.path.dirname(evoCDA_sigma_hit_path)}/evoCDA_MillipedeScores_edit_C>T.bedgraph", sep = '\t', header = None)
edit2 = pd.read_csv(f"{os.path.dirname(evoCDA_sigma_hit_path)}/evoCDA_MillipedeScores_edit_G>A.bedgraph", sep = '\t', header = None)


edit1.rename(columns={4: 'Col5_df1', 5: 'Col6_df1'}, inplace=True)
edit2.rename(columns={4: 'Col5_df2', 5: 'Col6_df2'}, inplace=True)

merged_df = edit1.merge(edit2, on=[0, 1, 2], how='outer')

max_col6_values = merged_df[['Col6_df1', 'Col6_df2']].max(axis=1)

columns_123 = merged_df[[0, 1, 2]]

new_df = columns_123.copy()
new_df['Max_Col6'] = max_col6_values
new_df.to_csv(f"{os.path.dirname(evoCDA_sigma_hit_path)}/evoCDA_PIP.bedgraph", sep="\t", index=False, header=False)

# Plotting

In [None]:
Rep1Color = "blue"
Rep2Color = "red"
Rep3Color = "green"
NegColor = "#949494"

ABE8ePosBetaColor = "green"
ABE8eNegBetaColor = "fuchsia"
evoCDAPosBetaColor = "blue"
evoCDANegBetaColor = "orange"

SigRegionColor = "black"
EditingColor = "black"
CircleSize = 4
NSLineSize = 6

# Create a list of tracks using the TrackFactory class
tracks = [
    # ABE8e Tracks
    TrackFactory.create_track(
        file_path=f"{ABE8e_SURF_path}neg_raw_scores.bedgraph",
        track_name="ABE8e Neg Raw Scores",
        plot_type="points",
        plotly_options={
            "marker_color": NegColor,
            "marker_size": CircleSize,
            "line_width": 0,
            "opacity": 0.5,
            "legendgroup": "ABE8e SURF",
            "legendgrouptitle_text": "ABE8e SURF",
        },
    ),
    TrackFactory.create_track(
        file_path=f"{ABE8e_SURF_path}rep1_raw_scores.bedgraph",
        track_name="ABE8e Rep1 Raw Scores",
        share_with_previous=True,
        plot_type="points",
        plotly_options={
            "marker_color": Rep1Color,
            "marker_size": CircleSize,
            "legendgroup": "ABE8e SURF",
            "legendgrouptitle_text": "ABE8e SURF",
        },
    ),
    TrackFactory.create_track(
        file_path=f"{ABE8e_SURF_path}rep2_raw_scores.bedgraph",
        track_name="ABE8e Rep2 Raw Scores",
        share_with_previous=True,
        plot_type="points",
        plotly_options={
            "marker_color": Rep2Color,
            "marker_size": CircleSize,
            "legendgroup": "ABE8e SURF",
            "legendgrouptitle_text": "ABE8e SURF",
        },
    ),
    TrackFactory.create_track(
        file_path=f"{ABE8e_SURF_path}rep3_raw_scores.bedgraph",
        track_name="ABE8e Rep3 Raw Scores",
        share_with_previous=True,
        plot_type="points",
        ymin=-2.5,
        ymax=2.5,
        plotly_options={
            "marker_color": Rep3Color,
            "marker_size": CircleSize,
            "legendgroup": "ABE8e SURF",
            "legendgrouptitle_text": "ABE8e SURF",
        },
    ),
    TrackFactory.create_track(
        file_path=f"{ABE8e_SURF_path}deconvolved_scores.bedgraph",
        track_name="ABE8e Deconvolved Scores",
        plot_type="lines",
        plotly_options={
            "line_color": "#a6cee3",
            "fillcolor": "#a6cee3",
            "legendgroup": "ABE8e SURF",
            "legendgrouptitle_text": "ABE8e SURF",
        },
    ),
    TrackFactory.create_track(
        file_path=f"{ABE8e_SURF_path}significant_regions.bed",
        track_name="ABE8e Significant Regions",
        plotly_plot_options={
            "fillcolor": SigRegionColor,
            "line_width": 2,
            "line_color": "black",
            "legendgroup": "ABE8e Sig. Regions",
            "legendgrouptitle_text": "ABE8e Sig. Regions",
        },
    ),
    # evoCDA Tracks
    TrackFactory.create_track(
        file_path=f"{evoCDA_surf_path}neg_raw_scores.bedgraph",
        track_name="evoCDA Neg Raw Scores",
        plot_type="points",
        plotly_options={
            "marker_color": NegColor,
            "marker_size": CircleSize,
            "line_width": 0,
            "opacity": 0.5,
            "legendgroup": "evoCDA SURF",
            "legendgrouptitle_text": "evoCDA SURF",
        },
    ),
    TrackFactory.create_track(
        file_path=f"{evoCDA_surf_path}rep1_raw_scores.bedgraph",
        track_name="evoCDA Rep1 Raw Scores",
        share_with_previous=True,
        plot_type="points",
        plotly_options={
            "marker_color": Rep1Color,
            "marker_size": CircleSize,
            "legendgroup": "evoCDA SURF",
            "legendgrouptitle_text": "evoCDA SURF",
        },
    ),
    TrackFactory.create_track(
        file_path=f"{evoCDA_surf_path}rep2_raw_scores.bedgraph",
        track_name="evoCDA Rep2 Raw Scores",
        share_with_previous=True,
        plot_type="points",
        plotly_options={
            "marker_color": Rep2Color,
            "marker_size": CircleSize,
            "legendgroup": "evoCDA SURF",
            "legendgrouptitle_text": "evoCDA SURF",
        },
    ),
    TrackFactory.create_track(
        file_path=f"{evoCDA_surf_path}rep3_raw_scores.bedgraph",
        track_name="evoCDA Rep3 Raw Scores",
        share_with_previous=True,
        plot_type="points",
        ymin=-2.5,
        ymax=2.5,
        plotly_options={
            "marker_color": Rep3Color,
            "marker_size": CircleSize,
            "legendgroup": "evoCDA SURF",
            "legendgrouptitle_text": "evoCDA SURF",
        },
    ),
    TrackFactory.create_track(
        file_path=f"{evoCDA_surf_path}deconvolved_scores.bedgraph",
        track_name="evoCDA Deconvolved Scores",
        plot_type="lines",
        plotly_options={
            "line_color": "#a6cee3",
            "fillcolor": "#a6cee3",
            "legendgroup": "evoCDA SURF",
            "legendgrouptitle_text": "evoCDA SURF",
        },
    ),
    TrackFactory.create_track(
        file_path=f"{evoCDA_surf_path}significant_regions.bed",
        track_name="evoCDA Significant Regions",
        plotly_plot_options={
            "fillcolor": SigRegionColor,
            "line_width": 2,
            "line_color": "black",
            "legendgroup": "evoCDA Sig. Regions",
            "legendgrouptitle_text": "evoCDA Sig. Regions",
        },
    ),
    # ABE8e Millipede and Motif Tracks
    TrackFactory.create_track(
        file_path=f"{os.path.dirname(ABE8e_sigma_hit_path)}/ABE8e_MillipedeScores_edit_A>G.bedgraph",
        track_name="ABE8e A>G Millipede Betas",
        plot_type="points",
        plotly_options={
            "marker_color": ABE8ePosBetaColor,
            "marker_symbol": "line-ns",
            "marker_size": NSLineSize,
            "marker_line_width": 2,
            "marker_line_color": ABE8ePosBetaColor,
            "legendgroup": "ABE8e Millipede",
            "legendgrouptitle_text": "ABE8e Millipede",
        },
    ),
    TrackFactory.create_track(
        file_path=f"{os.path.dirname(ABE8e_sigma_hit_path)}/ABE8e_MillipedeScores_edit_T>C.bedgraph",
        track_name="ABE8e T>C Millipede Betas",
        share_with_previous=True,
        plot_type="points",
        plotly_options={
            "marker_color": ABE8eNegBetaColor,
            "marker_symbol": "line-ns",
            "marker_size": NSLineSize,
            "marker_line_width": 2,
            "marker_line_color": ABE8eNegBetaColor,
            "legendgroup": "ABE8e Millipede",
            "legendgrouptitle_text": "ABE8e Millipede",
        },
    ),
    TrackFactory.create_track(
        file_path=f"{os.path.dirname(ABE8e_sigma_hit_path)}/ABE8e_PIP.bedgraph",
        track_name="ABE8e Millipede PIPs",
        plot_type="bars",
        plotly_options={
            "marker_color": "#a6cee3",
            "legendgroup": "ABE8e Millipede",
            "legendgrouptitle_text": "ABE8e Millipede",
        },
    ),
    TrackFactory.create_track(
        file_path=ABE8e_baseline_editing_efficiency_path,
        track_name="ABE8e Baseline Group Editing Efficiency",
        plot_type="lines",
        plotly_options={
            "line_color": "red",
            "line_width": 1,
            "fillcolor": "rgba(0,0,0,0)",
            "legendgroup": "ABE8e Millipede",
            "legendgrouptitle_text": "ABE8e Millipede",
        },
    ),
    TrackFactory.create_track(
        file_path=ABE8e_enriched_editing_efficiency_path,
        track_name="ABE8e Enriched Group Editing Efficiency",
        share_with_previous=True,
        plot_type="lines",
        plotly_options={
            "line_color": "blue",
            "line_width": 1,
            "fillcolor": "rgba(0,0,0,0)",
            "legendgroup": "ABE8e Millipede",
            "legendgrouptitle_text": "ABE8e Millipede",
        },
    ),
    # evoCDA Millipede and Motif Tracks
    TrackFactory.create_track(
        file_path=f"{os.path.dirname(evoCDA_sigma_hit_path)}/evoCDA_MillipedeScores_edit_C>T.bedgraph",
        track_name="evoCDA C>T Millipede Betas",
        plot_type="points",
        plotly_options={
            "marker_color": evoCDAPosBetaColor,
            "marker_symbol": "line-ns",
            "marker_size": NSLineSize,
            "marker_line_width": 2,
            "marker_line_color": evoCDAPosBetaColor,
            "legendgroup": "evoCDA Millipede",
            "legendgrouptitle_text": "evoCDA Millipede",
        },
    ),
    TrackFactory.create_track(
        file_path=f"{os.path.dirname(evoCDA_sigma_hit_path)}/evoCDA_MillipedeScores_edit_G>A.bedgraph",
        track_name="evoCDA G>A Millipede Betas",
        share_with_previous=True,
        plot_type="points",
        plotly_options={
            "marker_color": evoCDANegBetaColor,
            "marker_symbol": "line-ns",
            "marker_size": NSLineSize,
            "marker_line_width": 2,
            "marker_line_color": evoCDANegBetaColor,
            "legendgroup": "evoCDA Millipede",
            "legendgrouptitle_text": "evoCDA Millipede",
        },
    ),
    TrackFactory.create_track(
        file_path=f"{os.path.dirname(evoCDA_sigma_hit_path)}/evoCDA_PIP.bedgraph",
        track_name="evoCDA Millipede PIPs",
        plot_type="bars",
        plotly_options={
            "marker_color": "#a6cee3",
            "legendgroup": "evoCDA Millipede",
            "legendgrouptitle_text": "evoCDA Millipede",
        },
    ),
    TrackFactory.create_track(
        file_path=evoCDA_baseline_editing_efficiency_path,
        track_name="evoCDA Baseline Group Editing Efficiency",
        plot_type="lines",
        plotly_options={
            "line_color": "red",
            "line_width": 1,
            "fillcolor": "rgba(0,0,0,0)",
            "legendgroup": "evoCDA Millipede",
            "legendgrouptitle_text": "evoCDA Millipede",
        },
    ),
    TrackFactory.create_track(
        file_path=evoCDA_enriched_editing_efficiency_path,
        track_name="evoCDA Enriched Group Editing Efficiency",
        share_with_previous=True,
        plot_type="lines",
        plotly_options={
            "line_color": "blue",
            "line_width": 1,
            "fillcolor": "rgba(0,0,0,0)",
            "legendgroup": "evoCDA Millipede",
            "legendgrouptitle_text": "evoCDA Millipede",
        },
    ),
]
if os.path.exists(ABE8e_sig1_motifs_path):
    tracks += [
        TrackFactory.create_track(
            file_path=ABE8e_sig1_motifs_path,
            track_name="ABE8e Sig1 Motifs",
            rect_padding=0.1,
            plotly_plot_options={
                "fillcolor": "blue",
                "line_color": "black",
                "line_width": 0,
                "legendgroup": "Motifs",
                "legendgrouptitle_text": "Motifs",
            },
        ),
    ]
if os.path.exists(ABE8e_sig2_motifs_path):
    tracks += [
        TrackFactory.create_track(
            file_path=ABE8e_sig2_motifs_path,
            track_name="ABE8e Sig2 Motifs",
            share_with_previous=os.path.exists(ABE8e_sig1_motifs_path),
            rect_padding=0.1,
            plotly_plot_options={
                "fillcolor": "blue",
                "line_color": "black",
                "line_width": 0,
                "legendgroup": "Motifs",
                "legendgrouptitle_text": "Motifs",
            },
        ),
    ]
if os.path.exists(evoCDA_sig1_motifs_path):
    tracks += [
        TrackFactory.create_track(
            file_path=evoCDA_sig1_motifs_path,
            track_name="evoCDA Sig1 Motifs",
            share_with_previous=os.path.exists(ABE8e_sig1_motifs_path) or os.path.exists(ABE8e_sig2_motifs_path),
            rect_padding=0.1,
            plotly_plot_options={
                "fillcolor": "blue",
                "line_color": "black",
                "line_width": 0,
                "legendgroup": "Motifs",
                "legendgrouptitle_text": "Motifs",
            },
        ),
    ]
if os.path.exists(phyloP_path):
    tracks += [
        TrackFactory.create_track(
            file_path=phyloP_path,
            track_name="PhyloP Conservation Scores",
            plot_type="bars",
            plotly_options={"marker_color": "black"},
        ),
    ]

tracks +=[
    TrackFactory.create_track(
        track_name="Bottom Axis", track_type="axis", axis_type="verbose"
    ),
]


# Using TrackFactory to create all tracks
tracks = np.array(tracks).reshape(-1, 1)

# Set additional properties for each track
for track in tracks.ravel():
    if not isinstance(track, jdg.BedTrack) and not isinstance(track, jdg.XAxisTrack):
        track.hlines = [0]
        track.plotly_hline_options = {"line_width": 0.5}

# Define height proportions
height_props = {
    "raw_scores1": 1.5,
    "deconv_score1": 1,
    "sig_regions1": 0.1,
    "raw_scores2": 1.5,
    "deconv_score2": 1,
    "sig_regions2": 0.1,
    "betas1": 1.5,
    "pips1": 1,
    "editing1": 1,
    "betas2": 1.5,
    "pips2": 1,
    "editing2": 1,
    "motifs": 2.5,
    "convscores": 1,
    "axis": 0.1,
}

#remove motifs and convsscores if not present
if not os.path.exists(ABE8e_sig1_motifs_path) and not os.path.exists(ABE8e_sig2_motifs_path) and not os.path.exists(evoCDA_sig1_motifs_path):
    height_props.pop("motifs")
if not os.path.exists(phyloP_path):
    height_props.pop("convscores")

# Plot using PlotlyPlotter
tdp = jdg.PlotlyPlotter(tracks, total_height=1000, total_width=800)
fig = tdp.plot_all_tracks(
    column_titles=["Mismatch Corrected"],
    height_props=list(height_props.values()),
    layout_options={"legend_groupclick": "toggleitem"},
)


fig.show()