# 0. Import libraries

In [2]:
import pandas as pd
import os
import warnings
warnings.filterwarnings("ignore")

from pyStrint.strint import strInt
from pyStrint import preprocess as pp

# 1. Load files

In [3]:
inputDir = './'
outDir = f'{inputDir}/results/'
if not os.path.exists(outDir):
    os.makedirs(outDir)
sc_exp = pd.read_csv(f'{inputDir}/SC_exp.tsv',sep = '\t',header=0,index_col=0)
sc_meta = pd.read_csv(f'{inputDir}/SC_meta.tsv',sep = '\t',header=0,index_col=0)
st_exp = pd.read_csv(f'{inputDir}/ST_exp.tsv',sep = '\t',header=0,index_col=0)
st_coord = pd.read_csv(f'{inputDir}/ST_coord.tsv',sep = '\t',header=0,index_col=0)
st_decon = pd.read_csv(f'{inputDir}/ST_decon.tsv',sep = '\t',header=0,index_col=0)

In [4]:
# Append the addition lrdf to the our lrdb
species  = 'Human'
lr_list = pd.read_csv(f'{inputDir}/lr_df.csv',sep=',',header=0,index_col=0)
lr_df = pp.load_lr_df(species = species)
lr_list.columns = lr_df.columns
lr_df = pd.concat((lr_df,lr_list),axis = 0)
lr_df.index = list(range(len(lr_df)))

# 2. Parameters

In [5]:
st_tp = 'st'
alpha, beta, gamma, delta, eta = [2, 0.001, 0.001, 0.1, 0.0005]

num_per_spot = 6
repeat_penalty = 10

# max_rep for gradient descent, choose accordingly.
max_rep = 10

# 3. Get cell model

In [6]:
import smurf
operator = smurf.SMURF(n_features=15, estimate_only=True)
sc_distribution = operator.smurf_impute(sc_exp.T).T

Running SCEnd on 2131 cells and 3247 genes
normalizing data by library size...
preprocessing data...
number of iteration:  1 / 10
number of iteration:  2 / 10
number of iteration:  3 / 10
number of iteration:  4 / 10
number of iteration:  5 / 10
number of iteration:  6 / 10
number of iteration:  7 / 10
number of iteration:  8 / 10
number of iteration:  9 / 10
number of iteration:  10 / 10


In [7]:
sc_distribution.to_csv(f'{outDir}SC_smurf.tsv',sep = '\t',header=True,index=True)

In [None]:
# or you can load the precomputed results
sc_distribution = pd.read_csv(f'{outDir}SC_smurf.tsv',sep = '\t',header=0,index_col=0)

# 4. Preprocess

In [8]:
sc_adata, st_adata, sc_ref, lr_df = pp.prep_all_adata(sc_exp = sc_exp, st_exp = st_exp, sc_distribution = sc_distribution,
                                                    sc_meta = sc_meta, st_coord = st_coord, lr_df = lr_df, SP = species)

Data clean is done! Using 3244 shared genes .


# 5. Cell selection

In [9]:
obj_spex = strInt(save_path = outDir, st_adata = st_adata, weight = st_decon,
                    sc_ref = sc_ref, sc_adata = sc_adata, cell_type_key = 'celltype', lr_df = lr_df,
                    st_tp = 'st',species = species)
obj_spex.prep()
sc_agg_meta = obj_spex.select_cells(p = 0, mean_num_per_spot = num_per_spot, mode = 'strict', max_rep = 1,
                        repeat_penalty = repeat_penalty)

The cell index of sc_ref is not str, changed to str for consistency.
sc_ref and sc_adata has different genes, both data are subset to 3244 genes.
Parameters checked!
Getting svg genes
By setting k as 6, each spot has average 5.80 neighbors.
	 Estimating the cell number in each spot by the deconvolution result.
	 SpexMod selects 3070 feature genes.
	 Init solution: max - 0.9984,     mean - 0.9801,     min - 0.8953
	 Swap selection start...
	 Swapped solution: max - 1.00,     mean - 0.99,     min - 0.90


# 6. Refinement process

In [10]:
# sc_agg_meta.to_csv(f'{outDir}/spexmod_sc_meta.tsv',sep = '\t',header=True,index=True)
alter_sc_exp, sc_agg_meta = obj_spex.gradient_descent(
                alpha, beta, gamma, delta, eta,
                init_sc_embed = False,
                iteration = 10, k = 2, W_HVG = 2,
                left_range = 7, right_range = 8, steps = 1, dim = 2)

Running v12 now...
Init sc_coord by affinity embedding...
Avearge shape correlation is: 0.8528175122607736
First-term calculation done!
Second-term calculation done!
Third term calculation done!
Fourth term calculation done!
Hyperparameters adjusted.
-----Start iteration 0 -----
Avearge shape correlation is: 0.8528175122607736
First-term calculation done!
Second-term calculation done!
Third term calculation done!
Fourth term calculation done!
---0 self.loss4 881.4142166262536 self.GAMMA 3.6121919476265583e-06 self.GAMMA*self.loss4 0.003183837335820924
---In iteration 0, the loss is:loss1:3.18384,loss2:17.83044,loss3:4966.61088,loss4:881.41422,loss5:216.35201.
---In iteration 0, the loss is:loss1:3.18384,loss2:6.36767,loss3:0.00318,loss4:0.00318,loss5:0.31838.
The total loss after iteration 0 is 9.87626.
-----Start iteration 1 -----
Avearge shape correlation is: 0.8803128888545424
First-term calculation done!
Second-term calculation done!
Third term calculation done!
Fourth term calcula