# <b>spacemake</b> - Saturation Analysis

In [None]:
import spacemake as smk
from spacemake.config import ConfigFile
from spacemake.project_df import ProjectDF
from spacemake.report import utils as report_utils

import datetime
from functools import partial

In [None]:
run_modes = []
downsampled_dge_summary = {}
project_id = "project_id"
sample_id = "sample_id"
puck_barcode_file_id = "puck_barcode_file_id"
config_yaml_path = "config.yaml"  # at the root spacemake folder
project_df_path = "project_df.csv"  # at the root spacemake folder

PCT_DOWNSAMPLE_TO_PLOT = [20, 40, 60, 80, 100]
DOWNSAMPLE_PCTS = list(range(10, 110, 10))

In [None]:
config = ConfigFile.from_yaml(config_yaml_path)
project_df = ProjectDF(project_df_path, config=config)

umis_per_run_mode = {}
for run_mode in run_modes:
    umis_per_run_mode[run_mode] = config.get_run_mode(run_mode).variables['umi_cutoff']

if isinstance(run_modes, str):
    run_modes = [run_modes]

## Overview

### Run information

In [None]:
sample_info_df = report_utils.create_sample_info_df(project_df, 
                                                    project_id, 
                                                    sample_id, 
                                                    puck_barcode_file_id)

dge_summaries = {}
for run_mode in run_modes:
    dge_summaries[run_mode] = {}
    dge_summaries[run_mode] = report_utils.load_dge_summary_downsampling(
        downsampled_dge_summary, run_mode, DOWNSAMPLE_PCTS, puck_barcode_file_id
    )

In [None]:
visualizer = smk.pl.TabVisualizer()

# Sample Information table
visualizer.add_plot_group(
        smk.pl.PlotGroup(
        name=f"Sample Information",
        description=f"QC tables for the sample",
        plots=[smk.pl.DataFrameTable(
            title="Sample Information",
            description="",
            data=sample_info_df.T.reset_index(names='Variable')
        )]
    )
)

In [None]:
display(visualizer.generate_html())

## Downstream stats

In order to know whether we would gain more from sequencing deeper, we downsampled the data (the final.bam file) to contain 10%, 20%… 90% reads, and then we created the DigitalExpression matrix (as in the normal dropseq pipeline).

This can give us insight, whether we have reached the saturation point (in terms of median umi per cell and median genes per cell) or whether we should sequence deeper.

Results of this are plotted below.

### Histograms per run mode

In [None]:
visualizer = smk.pl.TabVisualizer()

for run_mode, dge_summary in dge_summaries.items():
    plots = []

    plot = smk.pl.Plot(
        title='total_counts',
        description=f"# of UMIs\nper spatial unit",
        plot_func=partial(smk.pl.density_per_downsampling, dge_summary, "total_counts", log_scale=True, color=smk.pl.metrics_colors["umis"], title="# of UMIs\nper spatial unit")
    )
    plots.append(plot)

    plot = smk.pl.Plot(
        title='n_reads',
        description=f"# of reads\nper spatial unit",
        plot_func=partial(smk.pl.density_per_downsampling, dge_summary, "n_reads", log_scale=True, color=smk.pl.metrics_colors["reads"], title="# of reads\nper spatial unit")
    )
    plots.append(plot)

    plot = smk.pl.Plot(
        title='reads_per_counts',
        description=f"median reads/UMI\nper spatial unit",
        plot_func=partial(smk.pl.density_per_downsampling, dge_summary, "reads_per_counts", log_scale=True, color=smk.pl.metrics_colors["pcr"], title="reads/UMI\nper spatial unit")
    )
    plots.append(plot)
    
    group = smk.pl.PlotGroup(
        name=f"Run Mode: {run_mode}",
        description=f"Analysis results for {run_mode}",
        plots=plots
    )
    
    visualizer.add_plot_group(group)

In [None]:
display(visualizer.generate_html())

Median plots per `run_mode`

In [None]:
visualizer = smk.pl.TabVisualizer()

for run_mode, dge_summary in dge_summaries.items():
    plots = []
    umi_cutoffs = umis_per_run_mode[run_mode]

    plot = smk.pl.Plot(
        title='total_counts',
        description=f"# of UMIs\nper spatial unit",
        plot_func=partial(smk.pl.median_per_downsampling, dge_summary, "total_counts", umi_cutoffs, color=smk.pl.metrics_colors["umis"], title="# of UMIs\nper spatial unit")
    )
    plots.append(plot)

    plot = smk.pl.Plot(
        title='n_reads',
        description=f"# of reads\nper spatial unit",
        plot_func=partial(smk.pl.median_per_downsampling, dge_summary, "n_reads", umi_cutoffs, color=smk.pl.metrics_colors["reads"], title="# of reads\nper spatial unit")
    )
    plots.append(plot)

    plot = smk.pl.Plot(
        title='reads_per_counts',
        description=f"median reads/UMI\nper spatial unit",
        plot_func=partial(smk.pl.median_per_downsampling, dge_summary, "reads_per_counts", umi_cutoffs, color=smk.pl.metrics_colors["pcr"], title="reads/UMI\nper spatial unit")
    )
    plots.append(plot)
    
    group = smk.pl.PlotGroup(
        name=f"Run Mode: {run_mode}",
        description=f"Analysis results for {run_mode}",
        plots=plots
    )
    
    visualizer.add_plot_group(group)

In [None]:
display(visualizer.generate_html())

### Deciled median plots per run mode

In [None]:
visualizer = smk.pl.TabVisualizer()

for run_mode, dge_summary in dge_summaries.items():
    plots = []

    deciledmedian = {"name": run_mode, "plot": None}
    decile_dat = report_utils.generate_deciled_data(dge_summary)

    plot = smk.pl.Plot(
        title='Deciled median',
        description=f"Deciled median",
        plot_func=partial(smk.pl.deciled_median, decile_dat)
    )
    plots.append(plot)
    
    group = smk.pl.PlotGroup(
        name=f"Run Mode: {run_mode}",
        description=f"Analysis results for {run_mode}",
        plots=plots
    )
    
    visualizer.add_plot_group(group)

In [None]:
display(visualizer.generate_html())