In [2]:
import scanpy as sc
import anndata as ad
from scprocessing.Pipeline import Pipeline
from scprocessing.QC import QC
from scprocessing.Normalization import Normalization
from scprocessing.Integration import Integration
from scprocessing.metrics import jaccard, silhouette, davies, calinski, evaluate
from scprocessing.SelectPipeline import SelectPipeline

In [3]:
%load_ext autoreload
%autoreload 2

# Reading Datasets

In [9]:
epithelial.var_names_make_unique()
epithelial.obs_names_make_unique()
epithelial.obs["Type"] = "Epithelial"
epithelial.obs["Type"] = epithelial.obs["Type"].astype("category")
del epithelial.obsm["X_diffmap"]

In [5]:
immune = sc.read_h5ad("/mnt/nationwide1/immune.h5ad")
immune.var_names_make_unique()
immune.obs_names_make_unique()
immune.obs["Type"] = "Immune"
immune.obs["Type"] = immune.obs["Type"].astype("category")
del immune.obsm["X_diffmap"]

In [4]:
from typing import List
from anndata import AnnData

def splitAD(dataset: AnnData, key: str) -> List[AnnData]:
    """
    Parameters:
        dataset: AnnData object
        key: observation field to split the dataset into
    Return:
        List of anndata split by key
    """
    unique_values = dataset.obs[key].unique()
    subsets = []
    
    # Iterate over unique values and create subsets
    for value in unique_values:
        subsets.append(dataset[dataset.obs[key] == value].copy())
    return subsets

# Model Selection

In [8]:
import numpy as np
immune_data = splitAD(immune, "tissue_condition")[:5]
select_immune = SelectPipeline(normalization=["seurat", "zheng17"],\
                                  integration=["harmony", "scanorama"],\
                                  metrics=["jaccard", "ari", "nmi"],
                                  resolution_range=[0.3],
                               key="tissue_condition"
                                 )

In [None]:
immune_res, report_immune, pipeline_immune = select_immune.search(immune_data, key_metric="ari")

In [125]:
final_df = []
for key, cluster in select_immune.clusters.items():
    init_df = pd.DataFrame(cluster.obsm['X_umap'], columns=['UMAP1', 'UMAP2'])
    init_df["tissue_condition"] = cluster.obs.tissue_condition.tolist()
    init_df["Normalization"] = key[0]
    init_df["Integration"] = key[1]
    final_df.append(init_df)
final_df = pd.concat(final_df)

In [211]:
# this cell just filters the final_df
filtered_df = final_df[final_df["Integration"] != "merge"]
all_cells = []
for condition in filtered_df["tissue_condition"].unique():
    all_cells.append(filtered_df[filtered_df["tissue_condition"] == condition].sample(3000))
filtered_df = pd.concat(all_cells)
filtered_df

Unnamed: 0,UMAP1,UMAP2,tissue_condition,Normalization,Integration
1388,5.393610,7.610217,Mammoplasty WT,seurat,harmony
3093,21.422897,2.280605,Mammoplasty WT,seurat,scanorama
4522,-4.255817,8.805834,Mammoplasty WT,seurat,harmony
6909,1.777498,-2.791402,Mammoplasty WT,seurat,harmony
4222,4.890188,-2.768391,Mammoplasty WT,seurat,scanorama
...,...,...,...,...,...
22765,11.401667,-2.490550,Mastectomy unknown,zheng17,scanorama
22851,4.052268,7.888750,Mastectomy unknown,seurat,scanorama
21791,1.477207,-10.788793,Mastectomy unknown,seurat,scanorama
22184,9.361031,6.385869,Mastectomy unknown,zheng17,scanorama


In [248]:
import altair as alt
import pandas as pd
alt.data_transformers.enable("vegafusion")

high_contrast_colors = ['#e6194b', '#3cb44b', '#ffe119', '#4363d8', '#f58231']

umap_chart = alt.Chart(filtered_df).mark_point(size=1.5).encode(
    x="UMAP1",
    y="UMAP2",
    color=alt.Color("tissue_condition").scale(range=high_contrast_colors)
)

facet_grid = umap_chart.facet(
    row="Normalization",
    column="Integration",
    title='UMAP plots for Different Combinations of Normalization and Integration Methods'
)
facet_grid = facet_grid.configure_axis(
    grid=False,
    labelFontSize=18,
    titleFontSize=18,
    tickSize=2
)

facet_grid = facet_grid.configure_header(
    labelFontSize=18,
    titleFontSize=18
)

facet_grid = facet_grid.configure_headerRow(
    labelFontSize=18,
    titleFontSize=18
)

facet_grid = facet_grid.configure_headerColumn(
    labelFontSize=18,
    titleFontSize=18
)

facet_grid = facet_grid.configure_legend(
    titleFontSize=18,
    labelFontSize=16
)

facet_grid.save("hbca.png", scale_factor=5.0)