## Planet Plot Demo

In [None]:
import sctoolbox.utilities as utils
import sctoolbox.plotting.planet_plot as pp

#### load adata

In [None]:
adata = utils.load_h5ad("/mnt/agnerds/user/aviral.jain/all-snRNA.h5ad")

In [None]:
adata.obs_keys

In [None]:
adata.var

In [None]:
fibroblast_marker_genes = ["COL1A1", "COL3A1","TGFB1", "MMP2", "ACTA2"]
vascular_endothelium_marker_genes = ["NOS3","VCAM1","ICAM1","SELE" ,"KLF2"]
myocardium_marker_genes = ["TNF","NPPB","NPPA","MYH7","ACTC1"]
blood_marker_genes = ["IL6","CRP","MMP9","TNF","CXCL8"]
immune_marker_genes = ["IL1B","TNF","CCL2","CD68","TGFBR2"]

In [None]:
x_col = "disease"
y_col = "cell_type_original"
x_label = "disease"
y_label = "cell type"
expression_threshold = 0
color_schema = "viridis"

In [None]:
plot_vars = pp.planet_plot_anndata_preprocess(adata,
                                  x_col,
                                  y_col,
                                  genes = myocardium_marker_genes,
                                  gene_symbols= 'feature_name',
                                  layer_value_aggregator = "mean",
                                  gene_count_aggregator = "median",
                                  gene_expression_aggregator = "median",
                              )

In [None]:
plot_vars

#### Dotplot like plot
Here you only see the percentage of cells expressed in the cluster.

In [None]:
pp.planet_plot_render(plot_vars,
                  x_col,
                  y_col,
                  mode = "aggregate",
                  size_value = "percentage",
                  color_value = "value",
                  use_log_scale = False,
                  planet_columns = myocardium_marker_genes,
                  color_schema = color_schema)

#### Count as size_value
Here you see also the count of cells in the cluster. It can be noted that different clusters have different count.

In [None]:
pp.planet_plot_render(plot_vars,
                  x_col,
                  y_col,
                  mode = "aggregate",
                  size_value = "count",
                  color_value = "value", 
                  use_log_scale = False,
                  planet_columns = myocardium_marker_genes,
                  color_schema = color_schema)

#### Planet mode
In order to see the individual contributions of the genes to the aggregate, we use the planet mode.

In [None]:
pp.planet_plot_render(plot_vars,
                  x_col,
                  y_col,
                  mode = "planet",
                  size_value = "count",
                  color_value = "value",
                  use_log_scale = False,
                  planet_columns = myocardium_marker_genes,
                  color_schema = color_schema)

#### percentage_max as color_value
The gene expressions in the last plot have become similar coloured, this is because including the individual gene expressions has greatly increased the range of gene expressions. In order to be able to do a better comparative analysis across the clusters for different genes. we set the color value to percentage_max, where the expression value for the dot (center or planet)  is calculated as a percentage of the maximum expression of that dot across all the clusters.

In [None]:
pp.planet_plot_render(plot_vars,
                  x_col,
                  y_col,
                  mode = "planet",
                  size_value = "count",
                  color_value = "percentage_max",
                  use_log_scale = False,
                  planet_columns = myocardium_marker_genes,
                  color_schema = color_schema)

#### use_log_scale
As some clusters have a relatively very small size compared to others, it is hard to get an estimate of the difference in count across different clusters. We can use log scale to shift the distribution a bit to get a better view and may be able to filter out insignificant values.

In [None]:
pp.planet_plot_render(plot_vars,
                  x_col,
                  y_col,
                  mode = "planet",
                  size_value = "count",
                  color_value = "percentage_max",
                  use_log_scale = True,
                  planet_columns = myocardium_marker_genes,
                  color_schema = color_schema)

#### Plot obs_columns
But, filtering cannot be done solely on the basis of cell count, we have many scoring metrics stored in the obs column. The planet plot also has an advanced preprocessing function where you can pass the obs columns and get the aggregate values to plot them as planets.

In [None]:
# Prepare df with obs_cols
plot_vars_2 = pp.planet_plot_anndata_preprocess_advanced(adata,
                                  x_col,
                                  y_col,
                                  genes = myocardium_marker_genes,
                                  obs_columns=['percent_mito', 'doublet_score', 'dissociation_score'],
                                  gene_symbols= 'feature_name',
                                  layer_value_aggregator = "mean",
                                  gene_count_aggregator = "median",
                                  gene_expression_aggregator = "median",
                              )

In [None]:
# Plot obs_columns using individual planet color schema
pp.planet_plot_render(plot_vars_2,
                  x_col,
                  y_col,
                  mode = "planet",
                  size_value = "count",
                  color_value = "value",
                  use_log_scale = False,
                  planet_columns = ['percent_mito', 'doublet_score', 'dissociation_score'],
                  color_schema = color_schema,
                  planet_color_schemas=[ "coolwarm", "cividis", "gray"])

#### obs columns with individual thresholds
Ofcoarse, we do not need to use the default threshold that we use for the gene expression also for the obs columns. We can pass a list of custom thresholds corresponding to each obs column.#

In [None]:
# Prepare df with individual thresholds
plot_vars_3 = pp.planet_plot_anndata_preprocess_advanced(adata,
                                  x_col,
                                  y_col,
                                  genes = myocardium_marker_genes,
                                  obs_columns=['percent_mito', 'doublet_score', 'dissociation_score'],
                                  obs_thresholds=[0.1, 0 ,0],
                                  gene_symbols= 'feature_name',
                                  layer_value_aggregator = "mean",
                                  gene_count_aggregator = "median",
                                  gene_expression_aggregator = "median",
                              )

In [None]:
# Plot obs_columns using individual thresholds
pp.planet_plot_render(plot_vars_3,
                  x_col,
                  y_col,
                  mode = "planet",
                  size_value = "count",
                  color_value = "value",
                  use_log_scale = False,
                  planet_columns = ['percent_mito', 'doublet_score', 'dissociation_score'],
                  color_schema = color_schema,
                  planet_color_schemas=[ "coolwarm", "cividis", "gray"])

#### Switching percent_mito aggregator to 'median'
We can also pass custom aggregators corresponding to obs columns. Uppon switching to median, the range of percent_mito is reduced and as a result it is easier to filter out the good quality clusters. We also modify the colorbar labels to show that now the median is displayed. There is a range of arguments customize all the labels in the plot.

In [None]:
# Switching percent_mito aggregator to 'median'
plot_vars_4 = pp.planet_plot_anndata_preprocess_advanced(adata,
                                  x_col,
                                  y_col,
                                  genes = myocardium_marker_genes,
                                  obs_columns=['percent_mito', 'doublet_score', 'dissociation_score'],
                                  obs_aggregator_array=['median','median','median'],
                                  obs_thresholds=[0.1, 0 ,0],
                                  gene_symbols= 'feature_name',
                                  layer_value_aggregator = "mean",
                                  gene_count_aggregator = "median",
                                  gene_expression_aggregator = "median",
                              )

In [None]:
# Plotting with new aggregator, the range is reduced, better decisions can be made using the color.
pp.planet_plot_render(plot_vars_4,
                  x_col,
                  y_col,
                  mode = "planet",
                  size_value = "count",
                  color_value = "value",
                  use_log_scale = False,
                  planet_columns = ['percent_mito', 'doublet_score', 'dissociation_score'],
                  color_schema = color_schema,
                  planet_color_schemas=[ "coolwarm", "cividis", "gray"],
                  colorbar_label_array = ['percent mito (median)', 'doublet score (median)', 'dissociation score (median)']
                  )

#### Column subsetting
Now, we pass the filtered clusters in y_col_subset, to only use this subset for our plotting.

In [None]:
# subsetting according to percent mito
plot_vars_5 = pp.planet_plot_anndata_preprocess_advanced(adata,
                                  x_col,
                                  y_col,
                                  y_col_subset=['Cardiomyocite', 'Cycling cells', 'Adipocyte', 'Fibroblast'],
                                  genes = myocardium_marker_genes,
                                  obs_columns=['percent_mito', 'doublet_score', 'dissociation_score'],
                                  obs_thresholds=[0.1, 0 ,0],
                                  obs_aggregator_array=['median','median','median'],
                                  gene_symbols= 'feature_name',
                                  layer_value_aggregator = "mean",
                                  gene_count_aggregator = "median",
                                  gene_expression_aggregator = "median")

In [None]:
# plot the subset, better comparision
pp.planet_plot_render(plot_vars_5,
                  x_col,
                  y_col,
                  mode = "planet",
                  size_value = "count",
                  color_value = "value",
                  use_log_scale = False,
                  planet_columns = ['percent_mito', 'doublet_score', 'dissociation_score'],
                  color_schema = color_schema,
                  planet_color_schemas=[ "coolwarm", "cividis", "gray"],
                  colorbar_label_array = ['percent mito (median)', 'doublet score (median)', 'dissociation score (median)']
                  )

In [None]:
# use the same df for gene expression plot
pp.planet_plot_render(plot_vars_5,
                  x_col,
                  y_col,
                  mode = "planet",
                  size_value = "count",
                  color_value = "percentage_max",
                  use_log_scale = False,
                  planet_columns = myocardium_marker_genes,
                  color_schema = color_schema)

#### Figure size adjustment
Ther two parameters FIG_SIZE_SCALER and PLANET_DIST_SCALER can be tuned to adjust the figure size and planet distance in order to achieve a perfect looking plot.

In [None]:
# saving space, reducing plot size, FIG_SIZE_SCALER=1.5 from 2 (default)
pp.planet_plot_render(plot_vars_5,
                  x_col,
                  y_col,
                  mode = "planet",
                  size_value = "count",
                  color_value = "percentage_max",
                  use_log_scale = False,
                  planet_columns = myocardium_marker_genes,
                  color_schema = color_schema,
                  FIG_SIZE_SCALER=1.5)

In [None]:
# Planets messed up, not problem! set adjust the PLANET_DIST_SCALER
pp.planet_plot_render(plot_vars_5,
                  x_col,
                  y_col,
                  mode = "planet",
                  size_value = "count",
                  color_value = "percentage_max",
                  use_log_scale = False,
                  planet_columns = myocardium_marker_genes,
                  color_schema = color_schema,
                  FIG_SIZE_SCALER=1.5,
                  PLANET_DIST_SCALER=2.35)

#### Legend 
There is a great range of parameters to tune size and alignment of the legends individually.

In [None]:
# Adjust legend sizes
pp.planet_plot_render(plot_vars_5,
                  x_col,
                  y_col,
                  mode = "planet",
                  size_value = "count",
                  color_value = "percentage_max",
                  use_log_scale = False,
                  planet_columns = myocardium_marker_genes,
                  color_schema = color_schema,
                  FIG_SIZE_SCALER=1.5,
                  PLANET_DIST_SCALER=2.35,
                  LEGEND_COLOR_WIDTH=1.5,
                  LEGEND_DOT_WIDTH=1.5,
                  LEGEND_DOT_HEIGHT=1.5,
                  LEGEND_PLANET_WIDTH=1.5,
                  LEGEND_PLANET_HEIGHT=1.5
                  )

In [None]:
# Make it even more compact, adjust legend alignments!
pp.planet_plot_render(plot_vars_5,
                  x_col,
                  y_col,
                  mode = "planet",
                  size_value = "count",
                  color_value = "percentage_max",
                  use_log_scale = False,
                  planet_columns = myocardium_marker_genes,
                  color_schema = color_schema,
                  FIG_SIZE_SCALER=1.5,
                  PLANET_DIST_SCALER=2.35,
                  LEGEND_COLOR_WIDTH=1.5,
                  LEGEND_DOT_WIDTH=1.5,
                  LEGEND_DOT_HEIGHT=1.5,
                  LEGEND_PLANET_WIDTH=1.5,
                  LEGEND_PLANET_HEIGHT=1.5,
                  LEGEND_COLOR_X_ALIGNMENT=-2,
                  LEGEND_COLOR_Y_ALIGNMENT=3,
                  LEGEND_DOT_Y_ALIGNMENT=-1.5,
                  LEGEND_PLANET_Y_ALIGNMENT=-1.5
                  )

### For more features read the documentation!