# Example 1: Computation of meta-analytic marker gene enrichment using mini_catactor
* Use 5 marker gene sets as default (SF, SC, CU, TA, TN + SM=SF for neuronal subtypes)
* Input: scanpy object (row=cell, column=gene name)
* Output: scanpy object, pickled file, or csv file

In [None]:
import pickle
import scanpy as sc
import sys
from Catactor import mini_catactor
scanpy_object_path = './scanpy_obj.pyn'
with open(scanpy_object_path, "rb") as f:
     adata = pickle.load(f)

# tsne_1 and tsne_2 columns are used as a dimension reduction space
dimension='tsne_'
# Return a scanpy object with new columns for signal enrichment
new_adata_with_signal = mini_catactor.run_mini_catactor(adata, plot_gene=True, dimension=dimension)
print(new_adata_with_signal.obs)
# Write to a pickle object
new_adata_with_signal = mini_catactor.run_mini_catactor(adata, output_ext='pyn', dimension=dimension)
# Write to a csv file
new_adata_with_signal = mini_catactor.run_mini_catactor(adata, output_ext='csv', dimension=dimension)

# Plot meta-analytic marker signals 
new_adata_with_signal = mini_catactor.run_mini_catactor(adata, plot=True, dimension=dimension)

# Plot each single marker gene enrichment
new_adata_with_signal = mini_catactor.run_mini_catactor(adata, plot_gene=True, dimension=dimension)


In [None]:
reference = "/home/rkawaguc/ipython/BICCN/script/Catactor/analysis/191219_meta/output/scobj/BICCN2_gene_id_order_gene__all_scanpy_obj.pyn"
with open(reference, "rb") as f:
    ref = pickle.load(f)
    

# Example 2: Computation of pseudo-bulk profiles for each cluster or top cells showing marker signal enrichment
* Input: scanpy object (row=cell, column=genomic bin) 
* Additional input: scanpy object (row=cell, column=gene name)
* Output: 4 csv files for pseudo-bulk profiles

In [None]:
from Catactor import pseudo_bulk
scanpy_bin_object_path = './scanpy_bin_obj.pyn'
with open(scanpy_bin_object_path, "rb") as f:
     pdata = pickle.load(f)

# Pseudo-bulk profiles for each cluster
pseudo_bulk.run_average_profiling(pdata, cluster=['cluster'])

# Pseudo-bulk profiles for top 500 cells in terms of the marker gene signal computed from reference
pseudo_bulk.run_average_profiling(pdata, reference=scanpy_obj_path)

# Add equally-divided genomic bin index (global_index_1000, global_index_2000, ...)
pseudo_bulk.genome_binning(pdata.var)

# Matrix conversion
cell_group, cell_original = 'global_index', 'global_index'
gene_group, gene_original = 'global_index_5000', 'global_index_1000'
new_pdata = pseudo_bulk.convert_row_and_column(pdata, gene_group, gene_original, cell_group, cell_original)
print(new_pdata)
