In [18]:
import scanpy as sc
import anndata as ad
from scprocessing.Pipeline import Pipeline
from scprocessing.QC import QC
from scprocessing.Normalization import Normalization
from scprocessing.Integration import Integration
from scprocessing.metrics import jaccard, silhouette, davies, calinski, evaluate
from scprocessing.SelectPipeline import SelectPipeline
from scprocessing.visualization import visualize_report
from scprocessing.utils import splitAD, read_single_cell_data
import numpy as np
import pandas as pd
from typing import List
from anndata import AnnData

In [2]:
%load_ext autoreload
%autoreload 2

# Reading Datasets

using data from immune dataset from: https://cellxgene.cziscience.com/collections/48259aa8-f168-4bf5-b797-af8e88da6637

In [19]:
immune = read_single_cell_data("/mnt/shared/nationwide/cell_type_datasets/human_brca_immune.h5ad")
immune.var_names_make_unique()
immune.obs_names_make_unique()
immune.obs["Type"] = "Immune"
immune.obs["Type"] = immune.obs["Type"].astype("category")
del immune.obsm["X_diffmap"] # dataset specific, for some reason 

# Model Selection

In [21]:
# splitting the anndata object.
immune_data = splitAD(immune, "tissue_condition")
select_immune = SelectPipeline(normalization=["seurat", "zheng17"],\
                               integration=["harmony", "scanorama"],\
                               metrics=["jaccard", "ari", "nmi"],
                               resolution_range=[0.3],
                               key="tissue_condition"
                                )

In [None]:
immune_res, report_immune, pipeline_immune = select_immune.search(immune_data, key_metric="ari")

In [None]:
visualize_report(report_immune, immune_res)

# Prettier Visual

In [7]:
final_df = []
for key, cluster in select_immune.clusters.items():
    init_df = pd.DataFrame(cluster.obsm['X_umap'], columns=['UMAP1', 'UMAP2'])
    init_df["tissue_condition"] = cluster.obs.tissue_condition.tolist()
    init_df["Normalization"] = key[0]
    init_df["Integration"] = key[1]
    final_df.append(init_df)
final_df = pd.concat(final_df)

In [None]:
# this cell just filters the final_df
filtered_df = final_df[final_df["Integration"] != "merge"]
all_cells = []
for condition in filtered_df["tissue_condition"].unique():
    all_cells.append(filtered_df[filtered_df["tissue_condition"] == condition])
filtered_df = pd.concat(all_cells)
filtered_df

In [9]:
import altair as alt
import pandas as pd
alt.data_transformers.enable("vegafusion")

high_contrast_colors = ['#e6194b', '#3cb44b', '#ffe119', '#4363d8', '#f58231']

umap_chart = alt.Chart(filtered_df).mark_point(size=1.5).encode(
    x="UMAP1",
    y="UMAP2",
    color=alt.Color("tissue_condition").scale(range=high_contrast_colors)
)

facet_grid = umap_chart.facet(
    row="Normalization",
    column="Integration",
    title='UMAP plots for Different Combinations of Normalization and Integration Methods'
)
facet_grid = facet_grid.configure_axis(
    grid=False,
    labelFontSize=18,
    titleFontSize=18,
    tickSize=2
)

facet_grid = facet_grid.configure_header(
    labelFontSize=18,
    titleFontSize=18
)

facet_grid = facet_grid.configure_headerRow(
    labelFontSize=18,
    titleFontSize=18
)

facet_grid = facet_grid.configure_headerColumn(
    labelFontSize=18,
    titleFontSize=18
)

facet_grid = facet_grid.configure_legend(
    titleFontSize=18,
    labelFontSize=16
)

facet_grid.save("hbca.png", scale_factor=5.0)
facet_grid