In [None]:
import scanpy as sc 
import anndata as ad
import polars as pl
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
PROJECT_DIR = "/home/jovyan/share/data/analyses/benjamin/Single_cell_supervised"

In [None]:
sc_profiles = pl.read_parquet(os.path.join(PROJECT_DIR, 'BF_MOA/DeepProfiler/datasets/specs5k_undersampled_significant_BF.parquet'))

In [None]:
features_fixed = [feat for feat in sc_profiles.columns if "Feature" in feat]
meta_features = [feat for feat in sc_profiles.columns if feat not in features_fixed]

In [None]:
meta_features

In [None]:
adata = ad.AnnData(X = sc_profiles.to_pandas()[features_fixed], obs = sc_profiles.to_pandas()[meta_features])

In [None]:
def run_scanpy(adata):
    sc.tl.pca(adata, svd_solver='arpack')
    sc.pp.neighbors(adata, n_neighbors=10, n_pcs=50)
    sc.tl.paga(adata, groups = "Metadata_cmpdName")
    sc.pl.paga(adata, plot=False)  # remove `plot=False` if you want to see the coarse-grained graph
    sc.tl.umap(adata, init_pos='paga')
    sc.tl.leiden(adata, key_added='clusters', resolution=0.2)

In [None]:
run_scanpy(adata)

In [None]:
sc.pl.pca(adata, color = "project",  palette="Set2")
#sc.pl.umap(adata, color = "Metadata_cmpdName",  palette="Set2")
sc.pl.umap(adata, color = "project",  palette="Set2")

In [None]:
adata.write("moa/sc_embedding_BF_sign_DP.h5ad")

In [None]:
# Assuming 'adata' is your AnnData object
# Extract unique categories excluding 'dmso'
categories = adata.obs['moa'].unique().tolist()
categories.remove('dmso')  # Remove 'dmso' to handle it separately

# Divide categories into two groups (example based on alphabetical order or any other criterion)
half = len(categories) // 2
group1 = categories[:half]
group2 = categories[half:]
group1.append('dmso')
group2.append('dmso')

In [None]:
def generate_density_plots(adata, basis, group_categories, plot_key_prefix):
    # Create a temporary column for grouping
    temp_group_col = 'temp_group'
    adata.obs[temp_group_col] = adata.obs['moa'].apply(lambda x: x if x in group_categories else None)
    
    # Generate and plot density
    sc.tl.embedding_density(adata, basis=basis, groupby=temp_group_col)
    sc.pl.embedding_density(adata, basis=basis, key=f'{basis}_density_{temp_group_col}', 
                            save=f"moa/sc_BF_sign_{plot_key_prefix}_density_{basis}.png")
    
    # Clean up temporary column
    del adata.obs[temp_group_col]

generate_density_plots(adata, 'umap', group2, 'group2')

In [None]:
generate_density_plots(adata, 'umap', group2, 'group2')

In [None]:
sc.tl.embedding_density(adata, basis=basis, groupby=temp_group_col)
sc.pl.embedding_density(adata, basis="umap", key='umap_density_temp_group')