# Workflow

In [None]:
import logging
import sys
from pathlib import Path

# logging.basicConfig(level=logging.INFO,)
logging.basicConfig(
    level=logging.DEBUG,
)

import warnings

import pandas as pd
from Bio import BiopythonDeprecationWarning
from roux.lib.io import read_table, to_dict, to_table
from roux.lib.sys import makedirs

# Suppress BiopythonDeprecationWarning
warnings.filterwarnings("ignore", category=BiopythonDeprecationWarning)
# Suppress FutureWarning from Pandas
warnings.filterwarnings("ignore", category=FutureWarning)

In [None]:
## parameters
method = None  #'Cas12a-BE' # one method for one run
input_path = None  #'inputs/mutations.tsv'
output_path = None  #'outputs/05_small.tsv'
output_dir_path = None

species_name = None  ## for pyensembl
release = None

genome_path = None  #'inputs/dna.fa'
gtf_path = None  #'inputs/ann.gtf'
transcript_path = None  # "inputs/RNA.fa"
protein_path = None  # "inputs/Protein.fa"

search_window = (
    None  ## bases left and right of the target to define the region to design guides in
)
not_be = False  # non-base editing applications (skips filtering by the editable base)

## offtargets
alignments_max = 3

## default
output_fn = "05_small"
output_ext = None
wd_path = None
igv_path_prefix = None
threads = 1
force = False
dbug = False
verbose = False
test = None  # to sppress Passed unknown parameter: test warning

## Inputs for the workflow

In [None]:
## inferred parameters
assert not (
    output_path is None and output_dir_path is None
), "either output_dir_path or output_path are required"
assert not (
    output_path is not None and output_dir_path is not None
), "either output_dir_path or output_path are required, not both"

if output_path is None and output_dir_path is not None:
    output_path = f"{output_dir_path}/{output_fn}.tsv"
    logging.info(f"Output path:{output_path}")

if output_path is not None and output_dir_path is None:
    output_dir_path = Path(output_path).parent
    logging.info(f"Output directory path:{output_dir_path}")

if output_ext is None:
    output_ext = "tsv"
## paths of files within the package
# import site
# bwa_path=f"{site.getsitepackages()[0]}/beditor/bwa/bwa"
# methods_path=f"{site.getsitepackages()[0]}/beditor/data/methods.tsv"#'../beditor/data/dbepams.tsv'
# if not Path(methods_path).exists():
#     ## if the package is installed in the development mode
#     import beditor
#     bwa_path=bwa_path.replace(site.getsitepackages()[0],str(Path(beditor.__file__).parent.parent))
#     methods_path=methods_path.replace(site.getsitepackages()[0],str(Path(beditor.__file__).parent.parent))

from beditor.lib.utils import get_src_path

# %run run.py
bwa_path = f"{get_src_path()}/bwa/bwa"
methods_path = f"{get_src_path()}/data/methods.tsv"  #'../beditor/data/dbepams.tsv'
assert Path(bwa_path).exists(), bwa_path
assert Path(methods_path).exists(), methods_path

In [None]:
if wd_path is not None:
    import os

    os.chdir(wd_path)
    logging.debug(f"cwd={wd_path}")

### Output paths

In [None]:
output_paths = pd.Series(
    dict(
        output_dir_path=str(output_dir_path),
        input_species=f"{output_dir_path}/00_inputs/species.yaml",
        input_pams=f"{output_dir_path}/00_inputs/pam.tsv",
        input_mutations=f"{output_dir_path}/00_inputs/mutations.{output_ext}",
        protein_pos_dir=f"{output_dir_path}/00_inputs/protein_positions",
        sequences=f"{output_dir_path}/01_sequences.{output_ext}",
        targets_bed=f"{output_dir_path}/01_sequences/targets.bed",
        flanks_bed=f"{output_dir_path}/01_sequences/flanks.bed",
        pam_searches=f"{output_dir_path}/03_guides/00_pam_searches.{output_ext}",
        guides_filtered=f"{output_dir_path}/03_guides/01_guides_filtered.{output_ext}",
        guides=f"{output_dir_path}/03_guides.{output_ext}",
        alignments=f"{output_dir_path}/04_offtargets/00_alignments.{output_ext}",
        alignments_extra_bed=f"{output_dir_path}/04_offtargets/00_alignments_extra.bed",
        alignment_penalties=f"{output_dir_path}/04_offtargets/01_alignments_penalties.{output_ext}",
        alignments_mapped=f"{output_dir_path}/04_offtargets/01_mappedby_alignments.{output_ext}",
        pam_bed=f"{output_dir_path}/04_offtargets/pams.bed",
        offtargets=f"{output_dir_path}/04_offtargets.{output_ext}",
        output_full=f"{output_dir_path}/05_full.{output_ext}",
        output=output_path,
    )
)
to_dict(output_paths.to_dict(), f"{output_dir_path}/output_paths.yaml")
output_paths_exist = output_paths.apply(lambda x: Path(x).exists())
if output_paths_exist.all() and not force:
    logging.warning("Output exists. Use force to overwrite them.")
    sys.exit(0)
elif output_paths_exist.any() and not force:
    logging.warning(
        f"Output exists for {output_paths_exist.sum()/len(output_paths_exist)} outputs. Use force to overwrite them."
    )
    # output_paths[output_paths_exist]=''

## Input data

In [None]:
assert (
    sum([k is not None for k in [species_name, release]]) == 2
    or sum(
        [k is not None for k in [genome_path, gtf_path, transcript_path, protein_path]]
    )
    >= 2
), "Either Ensembl genome information or the paths to the files are needed (not both). "
custom_genome = sum([k is not None for k in [species_name, release]]) == 0

### Annotations

Note: for species given species of interest, generate the `data` object by referring to the documentation of [`pyensembl`](https://github.com/openvax/pyensembl?tab=readme-ov-file#installation)

In [None]:
from beditor.lib.utils import get_annots

annots = get_annots(
    species_name=species_name,
    release=release,
    gtf_path=gtf_path,
    transcript_path=transcript_path,
    protein_path=protein_path,
    reference_name="assembly",
    annotation_name="source",
    # **kws_Genome,
)

In [None]:
## gtf file needed for the visualizations e.g. igv
if gtf_path is None:
    gtf_path = annots.gtf_path
    if gtf_path is None:
        from beditor.lib.io import to_downloaded_cached_path

        gtf_path = to_downloaded_cached_path(
            annots=annots,
            url=annots.gtf_url,
        )
assert Path(gtf_path).exists()

### Genome sequence 

In [None]:
if not custom_genome:
    from beditor.lib.io import download_genome

    genome_path = download_genome(
        species=species_name,
        ensembl_release=release,
        # force=force,
        verbose=True,
    )
    assert Path(genome_path).exists()

In [None]:
%%time
genome_is_large = Path(genome_path).stat().st_size > 200000000
# if genome_is_large:
    # logging.info(
    #     "indexing large genome. this is a time-taking step, but would save time subsequently."
    # )
from beditor.lib.io import to_2bit,to_fasta_index
try: 
    to_2bit(
        genome_path
    )
except:
    logging.warning(
"""
For fast processing of large genomes (highly recommended for human genome):

```
conda install install bioconda::ucsc-fatotwobit bioconda::ucsc-twobittofa bioconda::ucsc-twobitinfo # options: conda/mamba
```
"""    
    )
genome_path = to_fasta_index(
    genome_path=genome_path,
    bgzip_path=None,
    threads=threads,
    verbose=True,
    force=False,
    indexed=False,
)
logging.info(f"genome_path={genome_path}")
# else:
#     # genome_is_large=False
#     from beditor.lib.io import read_genome

#     genome = read_genome(genome_path)

In [None]:
to_dict(
    dict(
        genome_path=genome_path,
        gtf_path=gtf_path,
    ),
    output_paths["input_species"],
)

### Method

In [None]:
dbepams = read_table(methods_path, params=dict(sep="\t", keep_default_na=False))

In [None]:
from beditor.lib.methods import get_be2dpam

dpam = get_be2dpam(dbepams, methods=[method], test=True)[method].reset_index(drop=True)
to_table(
    dpam,
    # f'{output_dir_path}/00_inputs/pam.tsv'
    output_paths["input_pams"],
)

In [None]:
if search_window is None:
    search_window = int(dpam["guide length"].tolist()[0] * 2)
    logging.info(f"search_window={search_window}")

In [None]:
dpam.head()

In [None]:
cfg_method = {
    **dbepams.query(f"`method` == '{method}'").iloc[0, :].to_dict(),
    **dpam.query("`strand` == '+'").iloc[0, :].to_dict(),
}

In [None]:
cfg_method["PAM length"] = len(cfg_method["PAM"])
cfg_method["guide+PAM length"] = cfg_method["guide length"] + cfg_method["PAM length"]
cfg_method

In [None]:
to_dict(cfg_method, f"{output_dir_path}/00_inputs/pam.yaml")

### Mutations

In [None]:
if isinstance(input_path, str):
    df0 = read_table(input_path, ext='tsv')
elif isinstance(input_path, pd.DataFrame):
    df0 = input_path
df0 = df0.log.drop_duplicates()
df0.head(1)

In [None]:
if "protein id" in df0:
    mutation_format = "protein"
    if "mutation" in df0:
        mutation_type = "point"
    elif not "aa end":
        mutation_type = "position"
    else:
        mutation_type = "region"
else:
    mutation_format = "base"
    if "mutation" in df0:
        mutation_type = "point"
    elif not "end":
        mutation_type = "position"
    else:
        mutation_type = "region"
if mutation_type == "point" and not_be:
    not_be = False
    logging.warning("not_be set to False because input comtains point mutations")

In [None]:
if mutation_format == "base" and mutation_type == "point":
    df0, df0_ = (
        df0.log.query(expr=f"`mutation` == '{cfg_method['nucleotide mutation']}'"),
        df0.log.query(expr=f"`mutation` != '{cfg_method['nucleotide mutation']}'"),
    )
    to_table(
        df0,
        Path(output_paths["input_mutations"]).with_suffix("").as_posix()
        + "_with_nt_mutation.tsv",
    )
    to_table(
        df0_,
        Path(output_paths["input_mutations"]).with_suffix("").as_posix()
        + "_dropped_no_nt_mutation.tsv",
    )

### Standardise input mutations in terms of the genome co-ordinates

In [None]:
# outp=f'{output_dir_path}/00_inputs/mutations.{output_ext}'
if not Path(output_paths["input_mutations"]).exists() or force:
    from beditor.lib.get_mutations import get_mutation_coords

    df1 = get_mutation_coords(
        df0,
        annots,
        search_window,
        outd=None
        if not_be or mutation_format == "base"
        else output_paths["protein_pos_dir"],
        force=force,
    )
    to_table(df1, output_paths["input_mutations"])
else:
    df1 = read_table(output_paths["input_mutations"])
df1.head(1)

## Get flanking sequence

In [None]:
%%time
# outp=f'{output_dir_path}/01_sequences.{output_ext}'
if not Path(output_paths["sequences"]).exists() or force:
    from beditor.lib.utils import get_flanking_seqs

    df2 = get_flanking_seqs(
        df1,
        # fast=genome_is_large,
        # fast=False,
        genome=genome_path,  # if genome_is_large else genome,
        targets_path=output_paths["targets_bed"],
        flanks_path=output_paths["flanks_bed"],
        search_window=search_window,
    )

    if mutation_format == "protein" and mutation_type != "region":
        df2, df2_ = (
            df2.log.query(expr="`sequence target`.str.len() == 3"),
            df2.log.query(expr="`sequence target`.str.len() != 3"),
        )
        if len(df2_) != 0:
            logging.warning(
                f"filtered out guides without editable nucleotide: {len(df2_)}, Check: "
                + to_table(
                    df2_,
                    Path(output_paths["sequences"]).with_suffix("").as_posix()
                    + "/dropped_possible_splice_junction.tsv",
                )
            )
        # If the length is the same as the target sequence test equality with the codon sequence
        df_ = df1.loc[
            :,
            [
                "target location",
                "sequence target codon",
            ],
        ].merge(
            right=df2.loc[
                :,
                [
                    "target location",
                    "sequence target",
                ],
            ],
            on="target location",
            how="inner",
        )
        assert df_.apply(
            lambda x: x["sequence target codon"] == x["sequence target"], axis=1
        ).all(), df_.query(expr="`sequence target codon`!=`sequence target`")

    assert all(df2["start flanking"] < df2["end flanking"])
    to_table(df2, output_paths["sequences"])
else:
    df2 = read_table(output_paths["sequences"])
df2.head(1)

In [None]:
if mutation_format == "base" and mutation_type == "point":
    from beditor.lib.utils import str2seq

    df2, df2_ = (
        df2.log.query(
            expr=f"`sequence target` == ['{cfg_method['nucleotide']}','{str2seq(cfg_method['nucleotide']).reverse_complement()}']"
        ),
        df2.log.query(
            expr=f"`sequence target` != ['{cfg_method['nucleotide']}','{str2seq(cfg_method['nucleotide']).reverse_complement()}']"
        ),
    )
    to_table(
        df2,
        Path(output_paths["targets_bed"]).parent.as_posix()
        + "/mutations_with_nt_editable.tsv",
    )
    to_table(
        df2_,
        Path(output_paths["targets_bed"]).parent.as_posix()
        + "/mutations_dropped_no_nt_editable.tsv",
    )
    logging.info(df2["sequence target"].value_counts())
df2.head(1)

## Design

In [None]:
# outp=f'{output_dir_path}/03_guides/00_pam_searches.{output_ext}'
if not Path(output_paths["pam_searches"]).exists() or force:
    from beditor.lib.make_guides import get_guides

    # %run ../../beditor/beditor/lib/make_guides.py
    df3 = get_guides(df2, dpam, guide_len=cfg_method["guide length"])

    to_table(df3, output_paths["pam_searches"])
else:
    df3 = read_table(output_paths["pam_searches"])
df3.head(1)

### Map to the mutation

In [None]:
from beditor.lib.make_guides import get_distances

df4 = get_distances(
    df2,
    df3,
    cfg_method,
)
df4.head(1)

## Filter based on base-editing

### If the base exists in the activity window

In [None]:
if not not_be:
    from beditor.lib.make_guides import filter_guides

    # %run ../beditor/lib/make_guides.py
    df4, df4_ = filter_guides(df4, cfg_method)
    to_table(
        df4,
        output_paths["guides_filtered"],
    )
    if len(df4_) != 0:
        logging.warning(
            f"filtered out guides without editable nucleotide: {df4_['guide sequence'].nunique()}, Check: "
            + to_table(
                df4_,
                str(Path(output_paths["guides_filtered"]).with_suffix(""))
                + "_dropped.tsv",
            )
        )
df4.head(1)

### If window overlaps target

In [None]:
if not not_be:
    from beditor.lib.make_guides import get_window_target_overlap

    # %run ../beditor/lib/make_guides.py
    df5 = df4.assign(
        **{
            "window_overlaps_the_target,wts,nt_in_overlap,wtl": lambda df: df.apply(
                lambda x: get_window_target_overlap(
                    tstart=x["start"],
                    tend=x["end"],
                    wl=x["window locus"],
                    ws=x["window sequence"],
                    nt=cfg_method["nucleotide"],
                    # verbose=True,
                ),
                axis=1,
            ),
            "window target overlap": lambda df: df[
                "window_overlaps_the_target,wts,nt_in_overlap,wtl"
            ]
            .apply(lambda x: x[0])
            .fillna(False),
            "window target overlap sequence": lambda df: df[
                "window_overlaps_the_target,wts,nt_in_overlap,wtl"
            ].apply(lambda x: x[1]),
            "window target overlap editable": lambda df: df[
                "window_overlaps_the_target,wts,nt_in_overlap,wtl"
            ]
            .apply(lambda x: x[2])
            .fillna(False),
            "window target overlap locus": lambda df: df[
                "window_overlaps_the_target,wts,nt_in_overlap,wtl"
            ].apply(lambda x: x[3]),
        },
    ).drop(["window_overlaps_the_target,wts,nt_in_overlap,wtl"], axis=1)
    df5, df5_ = (
        df5.log.query(expr="`window target overlap editable`==True"),
        df5.log.query(expr="`window target overlap editable`==False"),
    )
    if len(df5_) != 0:
        logging.warning(
            f"filtered out guides without editable nucleotide in window-target overlap: {df5_['guide sequence'].nunique()}, Check: "
            + to_table(
                df5_,
                str(Path(output_paths["guides_filtered"]).with_suffix(""))
                + "_dropped_target_not_in_window_target_overlap.tsv",
            )
        )
else:
    df5 = df4.copy()
df5.head(1)

In [None]:
if mutation_format == "base" and mutation_type == "point":
    assert (df5["window target overlap sequence"].str.len() == 1).all(), (
        df5["window target overlap sequence"].str.len() == 1
    ).sum()

### If the edited mutation matches target mutation

In [None]:
if not not_be:
    from roux.lib.str import get_bracket

    from beditor.lib.utils import str2seq

    ## get edited sequence
    df5 = df5.assign(
        **{
            "window target overlap edited sequence": lambda df: df[
                "window target overlap sequence"
            ].str.replace(
                cfg_method["nucleotide"],
                cfg_method["nucleotide mutation"],
            ),
            "edited sequence": lambda df: df.apply(
                lambda x: x["window target overlap edited sequence"]
                if x["strand"] == get_bracket(x["window locus"])
                else str(
                    str2seq(x["window target overlap edited sequence"]).complement()
                ),
                axis=1,
            ),
        }
    ).log("guide locus")
df5.head(1)

In [None]:
if not not_be and mutation_type == "point":
    cols_right = ["target location", "mutation"] + (
        ["protein id", "sequence target codon", "aa pos"]
        if mutation_format == "protein"
        else []
    )
    cols_merge = [
        "target location"
    ]  # list(set(df5.columns.tolist()) & set(df1.columns.tolist()))
    df5 = (
        df5.merge(
            right=df1.loc[:, cols_right].log.drop_duplicates(),
            how="inner",
            on=cols_merge,
            #validate="1:m" if mutation_format == "base" else "m:m",
        ).log.drop_duplicates()
    ).log("guide locus")
df5.head(1)

In [None]:
if not not_be and mutation_format == "protein" and mutation_type == "point":
    from beditor.lib.make_guides import get_mutated_codon

    # if protein
    # stich to the codon
    df5 = df5.assign(
        **{
            "edited codon sequence": lambda df: df.apply(
                lambda x: get_mutated_codon(
                    tel=x["window target overlap locus"],
                    tes=x["edited sequence"],
                    tl=x["target location"],
                    ts=x["sequence target codon"],
                    strand=x["strand"],
                    # verbose=True,
                ),
                axis=1,
            ),
            "edited aa": lambda df: df["edited codon sequence"].apply(
                lambda x: str(str2seq(x).translate())
            ),
        },
    ).log("guide locus")
    assert (df5["sequence target codon"].str.len() == 3).all(), (
        df5["sequence target codon"].str.len() == 3
    ).sum()
    if mutation_type == "point":
        df5, df5_ = (
            df5.log.query(expr="`mutation`==`edited aa`"),
            df5.log.query(expr="`mutation`!=`edited aa`"),
        )
        if len(df5_) != 0:
            logging.warning(
                f"filtered out guides without editable nucleotide in window-target overlap: {df5_['guide sequence'].nunique()}, Check: "
                + to_table(
                    df5_,
                    str(Path(output_paths["guides_filtered"]).with_suffix(""))
                    + "_dropped_edited_does_not_match_mutation.tsv",
                )
            )
df5.head(1)

### Co-edits

In [None]:
if not not_be:
    # %run ../beditor/lib/make_guides.py
    from beditor.lib.make_guides import get_coedits_base

    df5 = df5.assign(
        coedits=lambda df: df.apply(
            lambda x: get_coedits_base(
                wl=x["window locus"],
                ws=x["window sequence"],
                wtl=x["window target overlap locus"],
                wts=x["window target overlap sequence"],
                nt=cfg_method["nucleotide"],
                # verbose=True,
            ),
            axis=1,
        )
    )

df5.head(1)

### Save guides

In [None]:
to_table(df5, output_paths["guides"])

In [None]:
# %run ../beditor/lib/io_seqs.py
from beditor.lib.io import to_fasta

guides_path = to_fasta(
    sequences=df5.assign(**{"id": lambda df: df["guide sequence"]})
    .set_index("id")["guide sequence"]
    .to_dict(),
    output_path=f"{output_dir_path}/03_guides.fa",
    molecule_type="RNA",
)

## Score (optional)

### Alignment

In [None]:
%%time
align_path = makedirs(f"{output_dir_path}/04_offtargets/alignment.sam")
if not Path(align_path).exists() or force:
    # %run ../beditor/lib/get_specificity.py
    from beditor.lib.get_specificity import run_alignment

    align_path = run_alignment(
        src_path=bwa_path,
        genomep=genome_path,
        guidesfap=guides_path,
        guidel=cfg_method["guide length"] + len(cfg_method["PAM"]),
        guidessamp=makedirs(f"{output_dir_path}/04_offtargets/alignment.sam"),
        threads=threads,
        verbose=True,
        # test=True,
    )

In [None]:
if not Path(output_paths["alignments"]).exists() or force:
    from beditor.lib.get_specificity import get_alignments

    df6, df6_ = get_alignments(
        align_path=align_path,
        genome=genome_path,  # if genome_is_large else genome,
        alignments_max=alignments_max,
        pam_pos=cfg_method["PAM position"],
        pam_len=len(cfg_method["PAM"]),
        pam_pattern=cfg_method["rPAM"],
        guide_len=cfg_method["guide length"],
        threads=threads,
        pam_bed_path=output_paths["pam_bed"],
        extra_bed_path=output_paths["alignments_extra_bed"],
        # fast=genome_is_large,
        # fast=False,
    )
    to_table(df6, output_paths["alignments"])
    to_table(
        df6_,
        str(Path(output_paths["alignments"]).with_suffix("")) + "_dropped_no_pam.tsv",
    )
else:
    df6 = read_table(output_paths["alignments"])
df6.head(1)

#### Drop the aligmnets without pam

### Calculate scores

In [None]:
# %run ../beditor/lib/get_specificity.py
from beditor.lib.get_specificity import get_penalties, score_alignments, score_guides

#### Per alignment

In [None]:
if not Path(output_paths["alignment_penalties"]).exists() or force:
    df6_ = get_penalties(
        guides=df5,
        aligns=df6,
        annots=annots,
    )
    to_table(
        df6_,
        output_paths["alignment_penalties"],
    )
else:
    df6_ = read_table(output_paths["alignment_penalties"])
df6_.head(1)

In [None]:
if not Path(output_paths["alignments_mapped"]).exists() or force:
    df7, _df7 = score_alignments(
        df6_, pam_len=len(cfg_method["PAM"]), pam_pos=cfg_method["PAM position"]
    )
    to_table(
        df7,
        output_paths["alignments_mapped"],
    )
    if len(_df7) != 0:
        logging.warning(
            f"on-target alignment not found for: {_df7['guide sequence'].nunique()}, Check: "
            + to_table(
                _df7,
                str(Path(output_paths["alignments_mapped"]).with_suffix(""))
                + "_dropped.tsv",
            )
        )
        logging.warning(f"{_df7['aligned XT'].value_counts().to_string()}")
else:
    df7 = read_table(output_paths["alignments_mapped"])
df7.head(1)

#### Per guide

In [None]:
df8 = score_guides(
    guides=df5,
    scores=df7,
    not_be=not_be,
)
df8.head(1)

In [None]:
to_table(
    df8,
    # f'{output_dir_path}/04_offtargets.{output_ext}'
    output_paths["offtargets"],
)

## Outputs

In [None]:
if dbug:
    print(df1.columns.tolist())
    print(df5.columns.tolist())
    print(df8.columns.tolist())

In [None]:
%run ../beditor/lib/io.py
# from beditor.lib.io import to_output
df9 = to_output(
    inputs=df1,  ## gene, protein etc
    guides=df5,
    scores=df8,
)
df9.head(1)

In [None]:
to_table(
    df9,
    # f'{output_dir_path}/05_full.{output_ext}'
    output_paths["output_full"],
)

### Stats

In [None]:
sgs_count = len(
    df9.loc[
        :, ["target location"] + (["mutation"] if mutation_type == "point" else [])
    ].drop_duplicates()
)
logging.info(f"sgRNAs are designed for {sgs_count}/{len(df1)} input mutations")

### Reduce the output

In [None]:
cols_output = [
    "chrom",
    "start",
    "end",
    "guide sequence",
    "guide+PAM sequence",
    "guide strand",
    "strand",
    "score",
    "alignments",
    "polyT stretch length",
]
if len(set(df0.columns.tolist()) & set(cols_output)) == 0:
    df9_ = df9.loc[:, df0.columns.tolist() + cols_output]
else:
    df9_ = df9.loc[
        :, df0.columns.tolist() + list(set(cols_output) - set(df0.columns.tolist()))
    ]
df9_.head(1)

In [None]:
to_table(df9_, output_path)

In [None]:
## display dataframe #todo debug
# (df9_
# # .set_index(df0.columns.tolist())
# .sort_values(['score'],ascending=[False])
# .style
#     .set_properties(subset=['guide sequence','guide+PAM sequence'],**{'font-family': 'Monospace'})
#     .format(subset=['score'],precision=2)
#     .background_gradient(subset=['score'],cmap='Reds_r')
#     # .hide(axis="index",) # error: AttributeError: 'Styler' object has no attribute 'hide'
# )
# df9_.head()

Location coordinates

1-based (#tmp because of pyensembl)

    "sequence target codon"
    "target edited location"
    "start", "end"
    "start PAM", "end PAM"
    
0-based (#todo rename locus)

    "guide location"
    "window locus"
    "aligned location"

## Visualizations

## Library stats

In [None]:
from beditor.lib.viz import get_plot_inputs, plot_library_stats

axs = plot_library_stats(
    dfs=get_plot_inputs(df9),
    not_be=not_be,
)

### Integrative Genomics Viewer (IGV)

In [None]:
from beditor.lib.viz import to_igv

igv_session_path = to_igv(
    # cfg=None,
    gtf_path=gtf_path,
    genome_path=genome_path,
    output_dir_path=output_dir_path,
    # threads=threads,
    output_ext=output_ext,
    force=force,
)
print(igv_session_path)

In [None]:
import igv_notebook

igv_notebook.init()
b = igv_notebook.Browser({}).load_session(path=igv_session_path)