In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import squidpy as sq
import sccellfie
import scanpy as sc
import pandas as pd
import numpy as np
import networkx as nx

import matplotlib.pyplot as plt
import seaborn as sns
import glasbey
import h5py
import math
import requests

import textwrap

from pathlib import Path

In [3]:
base_dir = Path().resolve()
parent_dir = base_dir.parent
parent_dir

PosixPath('/home/sadegh/projects/nanostring-cosmx-spatial-omics-modeling')

### Load Datasets

In [None]:
# Load the dataset
adata = sc.read(filename=parent_dir / 'data/h5ad/filtered_normalized_data.h5ad')
adata

AnnData object with n_obs × n_vars = 174747 × 1011
    obs: 'nCount_Nanostring', 'nFeature_Nanostring', 'cell_ID', 'fov', 'Area', 'AspectRatio', 'Width', 'Height', 'Mean.PanCK', 'Max.PanCK', 'Mean.CD68', 'Max.CD68', 'Mean.CD298_B2M', 'Max.CD298_B2M', 'Mean.CD45', 'Max.CD45', 'Mean.DAPI', 'Max.DAPI', 'id', 'TMA', 'Subject_ID', 'Treatment_Status', 'Run_Tissue_name', 'log10totalcounts', 'cell_type', 'tissue', 'sample', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt'
    var: 'gene', 'mt', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells'
    obsm: 'spatial'

In [5]:
adata.var_names = adata.var["gene"].astype(str)
adata.var_names_make_unique()

### Apply scCellFie Pipeline

In [None]:
results = sccellfie.run_sccellfie_pipeline(adata,
                                           organism='human',
                                           sccellfie_data_folder=None,
                                           n_counts_col='nCount_Nanostring',
                                           process_by_group=False,
                                           groupby=None, # 'Treatment_Status' or None
                                           neighbors_key='neighbors',
                                           n_neighbors=10,
                                           batch_key='sample',
                                           threshold_key='sccellfie_threshold',
                                           smooth_cells=True,
                                           alpha=0.33,
                                           chunk_size=5000,
                                           disable_pbar=True,
                                           save_folder=None, # 'result or None',
                                           save_filename=None, # 'sccellfie_results or None'
                                          )

## scCellFie Result Data Understanding

In [None]:
results

In [None]:
metabolic_data = results['adata'].metabolic_tasks
metabolic_data

In [None]:
reaction_data = results['adata'].reactions
reaction_data

## Save Gene, Reactions and Metabolic Tasks as CSV

In [None]:
# results['adata'].reactions.X or results['adata'].metabolic_tasks.X
df = pd.DataFrame(
    results['adata'].X,
    index=results['adata'].obs.index,
    columns=results['adata'].var.index
    )
df

In [None]:
# df.to_csv('scCellFie_genes_with_name.csv')

In [None]:
# Save adata objects containing single-cell/spatial predictions in H5AD format
sccellfie.io.save_adata(adata=results['adata'],
                        output_directory='output/',
                        filename='sccellfie_results'
                        )

## Visualization on scCellFie Data

In [None]:
gp = 'Subject_ID'

### Cell group level for the Metabolic Task Visualizer

In [None]:
# Summarize results in a cell-group level for the Metabolic Task Visualizer
report = sccellfie.reports.generate_report_from_adata(results['adata'].metabolic_tasks,
                                                      group_by=gp,
                                                      feature_name='metabolic_task'
                                                      )

In [None]:
# Export files to a specific folder.
sccellfie.io.save_result_summary(results_dict=report, output_directory='report/')

In [None]:
metabolic_tasks = ['ATP generation from glucose (hypoxic conditions) - glycolysis', 
                   'ATP regeneration from glucose (normoxic conditions) - glycolysis + krebs cycle',
                   'Gluconeogenesis from Lactate',
                   'Glutaminolysis (glutamine to lactate)',
                   'Glucose to lactate conversion'
                  ]
# metabolic_tasks = results['adata'].metabolic_tasks.var.index.tolist()
metabolic_tasks

In [None]:
palette = glasbey.extend_palette('Set2', palette_size=max([10, results['adata'].metabolic_tasks.obs[gp].unique().shape[0]]))

plt.rcParams['figure.figsize'] = (3,3)
plt.rcParams['font.size'] = 10

sc.pl.embedding(results['adata'].metabolic_tasks,
                color=[gp] + metabolic_tasks,
                ncols=1,
                palette=palette,
                frameon=False,
                basis='X_umap',
                wspace=0.7,
                title=["\n".join(textwrap.wrap(t, width=60)) for t in [gp] + metabolic_tasks],
                cmap='OrRd'
               )

In [None]:
# Violin Plot
fig, axes = sccellfie.plotting.create_multi_violin_plots(results['adata'].metabolic_tasks,
                                                         features=metabolic_tasks,
                                                         groupby=gp,
                                                         stripplot=False,
                                                         n_cols=2,
                                                         ylabel='Metabolic Score'
                                                        )

In [None]:
ax = sc.pl.stacked_violin(results['adata'].metabolic_tasks, metabolic_tasks, groupby=gp, swap_axes=True, dendrogram=False, standard_scale='var')

### Dot Plot

In [None]:
# Genes
sc.pl.dotplot(results['adata'], var_names=results['adata'].var.index, groupby=gp, swap_axes=True, standard_scale='var')

In [None]:
# Reactions
sc.pl.dotplot(reaction_data, var_names=results['adata'].reactions.var.index, groupby=gp, swap_axes=True, standard_scale='var')

In [None]:
# Metabolic Tasks
sc.pl.dotplot(metabolic_data, var_names=metabolic_tasks, groupby=gp, swap_axes=True, standard_scale='var')

### Heat Map 

In [None]:
ax = sc.pl.heatmap(metabolic_data, var_names=metabolic_tasks, groupby=gp, cmap="YlGnBu", swap_axes=True, dendrogram=True,
                   figsize=(16, 4)
                  )

### Aggregation 

In [None]:
# number of non zero metaboli tasks data
count = np.count_nonzero(metabolic_data.X)
count

In [None]:
gp

In [None]:
agg = sccellfie.expression.aggregation.agg_expression_cells(metabolic_data, groupby=gp, agg_func='mean')

In [None]:
(agg != 0).sum().sum()


In [None]:
input_df = sccellfie.preprocessing.matrix_utils.min_max_normalization(agg.T, axis=1)

In [None]:
plt.figure(figsize=(16, 4))
g = sns.heatmap(input_df.loc[metabolic_tasks,:], cmap='YlGnBu', linewidths=0.5, xticklabels=1, yticklabels=1)

cbar = g.collections[0].colorbar
cbar.set_label('Scaled metabolic activity', size=14, rotation=270, labelpad=25)  # Change colorbar label size and rotation

# Uncomment code below to save figure
# plt.savefig('./figures/Heatmap-Seaborn.pdf', dpi=300, bbox_inches='tight')

In [None]:
ax = sc.pl.tracksplot(metabolic_data, var_names=metabolic_tasks, groupby=gp, dendrogram=True, figsize=(16, 4))

### Radial Plot

In [None]:
df_melted = pd.melt(input_df.reset_index(), id_vars='Task', var_name='cell_type', value_name='scaled_trimean')
df_melted = df_melted.rename(columns={'Task': 'metabolic_task'})
df_melted.head()

In [None]:
ct = df_melted.cell_type.unique()[0:4]
ct

In [None]:
df_melted

In [None]:
subset = df_melted[df_melted['cell_type'] == str(ct[i])]
print(subset.head())
print(subset.isna().sum())

In [None]:
fig = plt.figure(figsize=(16, 16))
ax1 = fig.add_subplot(221, projection='polar')
ax2 = fig.add_subplot(222, projection='polar')
ax3 = fig.add_subplot(223, projection='polar')
ax4 = fig.add_subplot(224, projection='polar')

for i, (cell, ax) in enumerate(zip(gp, [ax1, ax2, ax3, ax4])):
    sccellfie.plotting.create_radial_plot(df_melted, 
                                          results['task_info'],
                                          cell_type=str(ct[i]),
                                          ax=ax,
                                          show_legend=i == 1,
                                          ylim=1.0)

In [None]:
# Number of subjects
n_subjects = 24

# make a grid: 6x4 works well for 24
n_cols = 6
n_rows = math.ceil(n_subjects / n_cols)

cell_type_data = df_melted.cell_type.unique()

fig, axes = plt.subplots(
    n_rows, n_cols,
    subplot_kw=dict(projection='polar'),
    figsize=(20, 20)
)

# flatten axes for easy indexing
axes = axes.flatten()

for i in range(n_subjects):
    ax = axes[i]
    sccellfie.plotting.create_radial_plot(
        df_melted, 
        results['task_info'],
        cell_type=str(cell_type_data[i]),   
        ax=ax,
        show_legend=False,  
        ylim=1.0
    )
    ax.set_title(f"Subject {cell_type_data[i]}", fontsize=10)

# hide any unused subplots (in case n_subjects doesn’t fill the grid)
for j in range(i+1, len(axes)):
    fig.delaxes(axes[j])


plt.tight_layout() 
plt.show()
