In [14]:
#| include: false
import os
import pandas as pd
import plotly.io as pio
from plotly.subplots import make_subplots

pio.renderers.default = "notebook"
pio.templates.default = 'plotly_white'

from report_helpers import *

# quarto runs the notebook in the 'scripts' directory
os.chdir('..')

# defaults
seq_tech = "np"
read_counts = "tests/data/report_inputs/np-2389-test_read-counts.tsv"
assigned_parents = "tests/data/report_inputs/np-2389-test_inc-non-par_parent-counts.tsv.gz"
parent_frequencies = "tests/data/report_inputs/np-2389-test_inc-non-par_assigned-parents_freq.tsv.gz"
breakpoints_per_var = "tests/data/report_inputs/np-2389-test_inc-non-par-pervar.tsv.gz"
dmat_nt_first = "tests/data/report_inputs/np-2389-test_inc-non-par_first_nt-seq.tsv.gz"
dmat_nt_random = "tests/data/report_inputs/np-2389-test_inc-non-par_random_nt-seq.tsv.gz"
dmat_aa_first = "tests/data/report_inputs/np-2389-test_inc-non-par_first_aa-seq.tsv.gz"
dmat_aa_random =  "tests/data/report_inputs/np-2389-test_inc-non-par_random_aa-seq.tsv.gz"


# Read counts

Throughout processing, reads are discarded if they don't pass a number of filters.  The filters applied are:

- Consensus (np-cc only): Removes reads with a consensus that is too long or too short.
- Filtered by repeats (np-cc only): Removes consensus reads that were generated from too few repeats.
- Filtered by reference coverage: Removes reads that don't cover all parental variants in the reference.
- Filtered non-parental variants: Removes any reads that contain at least one allele not matching any of the parents.  For the purpose of this filter, if high-frequency, non-parental variants are retained, they are considered to be parental variants.

The number of reads passing each filter are displayed below.

In [15]:
#| content: valuebox
#| title: Reads passing all filters

print_fraction_nt_reads_pass(read_counts, seq_tech)

FileNotFoundError: [Errno 2] No such file or directory: 'tests/data/report_inputs/np-2389-test_read-counts.tsv'

In [None]:
#| content: valuebox
#| title: Distinct reads at nucleotide level

print_unique_nt_reads(read_counts, seq_tech)

In [None]:
#| content: valuebox
#| title: Distinct reads at amino acid level

print_unique_aa_reads(read_counts, seq_tech)

In [None]:
#| title: Read counts after each filter

read_count_graph(read_counts, seq_tech)

In [None]:
#| title: Fraction of reads retained after each filter

read_fraction_graph(read_counts, seq_tech)

In [None]:
#| title: Assigned parents for top reads

fig = parent_heatmap(assigned_parents, parent_frequencies)
fig.show()

In [None]:
#| title: Parent frequency

plot_parent_frequencies(parent_frequencies)

In [None]:
#| title: Breakpoint frequency

plot_breakpoints(breakpoints_per_var, read_counts)

In [None]:
#| title: Distance matrix of top reads (nucleotide sequences)

fig = make_distance_heatmap(dmat_nt_first)
fig.show()

In [None]:
#| title: Distance matrix of top reads (amino acid sequences)

fig = make_distance_heatmap(dmat_aa_first)
fig.show()

In [None]:
#| title: Distance matrix of randomly sampled reads (nucleotide sequences)

fig = make_distance_heatmap(dmat_nt_random)
fig.show()

In [None]:
#| title: Distance matrix of randomly sampled reads (amino acid sequences)

fig = make_distance_heatmap(dmat_aa_random)
fig.show()