In [None]:
import scanpy as sc
import cell2location

In [None]:
hamstring_h5ad = "/ceph/project/tendonhca/albrecht/003-snakemake/data/hamstring_integrated_annotated_res0p15_20220922.h5ad"
# create paths and names to results folders for reference regression and cell2location models
RESULTS_FOLDERNAME = '/ceph/project/tendonhca/albrecht/003-snakemake/notebooks/hamstring'
ref_run_name = f'{RESULTS_FOLDERNAME}/reference_signatures'
run_name = f'{RESULTS_FOLDERNAME}/cell2location_map'

In [None]:
adata_hamstring = sc.read(hamstring_h5ad)
adata_hamstring

Later, we figure out that there is not layer 'counts'.
Here, I inspect .X and assume that it is the count matrix.
As such, I assign it to the 'counts' layer, to help cell2location run.

In [None]:
adata_hamstring.X[:3, :3].toarray()

In [None]:
adata_hamstring.layers["counts"] = adata_hamstring.X

In [None]:
adata_hamstring.obs['cell_type'].value_counts()

In [None]:
adata_hamstring.var

In [None]:
from cell2location.utils.filtering import filter_genes
selected = filter_genes(adata_hamstring, cell_count_cutoff=30, #cell_percentage_cutoff2=0.03, 
                        nonz_mean_cutoff=1.12)

In [None]:
# filter the object
adata_hamstring = adata_hamstring[:, selected].copy()
adata_hamstring.var

In [None]:
adata_hamstring.obs

In [None]:
#adata_hamstring.obs['batch'].value_counts()
adata_hamstring.obs['sex'].value_counts()
#adata_hamstring.obs['sample'].value_counts()

In [None]:
adata_hamstring.layers

In [None]:
# prepare anndata for the regression model
cell2location.models.RegressionModel.setup_anndata(adata=adata_hamstring,
                        layer="counts",
                        # 10X reaction / sample / batch
                        batch_key='sample',
                        # cell type, covariate used for constructing signatures
                        labels_key='cell_type',
                        # multiplicative technical effects (platform, 3' vs 5', donor effect)
                        categorical_covariate_keys=['batch', 'sex']
                       )

# create the regression model
from cell2location.models import RegressionModel
mod = RegressionModel(adata_hamstring)

# view anndata_setup as a sanity check
mod.view_anndata_setup()

In [None]:
%%time
mod.train(max_epochs=250, use_gpu=False)

In [None]:
mod.plot_history(20)

In [None]:
# In this section, we export the estimated cell abundance (summary of the posterior distribution).
adata_hamstring = mod.export_posterior(
    adata_hamstring, sample_kwargs={'num_samples': 5000, 'batch_size': 2500, 'use_gpu': True}
)

# Save model
mod.save(f"{ref_run_name}", overwrite=True)

# Save anndata object with results
adata_file = f"{ref_run_name}/sc.h5ad"
adata_hamstring.write(adata_file)
adata_file