# This tutorial shows how to run Cytocraft on subcell-resolution ST data

## Load packages

In [10]:
from src.cytocraft import craft
gem_path = './demo/merfish_ileum/transcripts.gem.csv'
obs_path = './demo/merfish_ileum/cell_feature.csv'

## Read subcell-resolution ST gene expression matrix (gem) file and corresponding cell meta file

In [11]:
gem = craft.read_gem_as_csv(gem_path, sep=',')

In [3]:
gem

Unnamed: 0,CellID,x,y,geneID,MIDCount
0,1,1705,1271,Maoa,1.0
1,2,1725,1922,Maoa,1.0
2,2,1753,1863,Maoa,1.0
3,2,1760,1865,Maoa,1.0
4,3,1904,794,Maoa,1.0
...,...,...,...,...,...
819660,1365,5704,38,Hrh1,1.0
819661,1365,5685,43,Htr4,1.0
819662,1365,5631,61,Taar6,1.0
819663,1365,5720,62,Taar7a,1.0


In [4]:
obs = pd.read_csv(obs_path, sep='\t')

In [5]:
obs

Unnamed: 0,cell_id,center_x,center_y,girth,area,roundness,mRNA_count,cell_type
0,1,1740.994710,1264.435066,457.891583,11374.500122,0.681737,157,Myocyte
1,2,1737.780300,1862.797246,434.107057,11497.500016,0.766690,361,Neuron
2,3,1956.299821,844.821529,757.996183,14084.500173,0.308047,173,Myocyte
3,4,1906.847734,1438.033371,425.555147,8350.499917,0.579443,97,Myocyte
4,5,1846.247869,1025.504362,258.979455,1681.000022,0.314954,48,Goblet cell
...,...,...,...,...,...,...,...,...
5240,5241,4775.738349,9203.993896,364.105471,2020.500012,0.191520,82,Vascular
5241,5242,4707.330224,9169.062368,276.889768,1822.500001,0.298720,67,Goblet cell
5242,5243,4730.311728,9174.575618,266.682651,1079.999977,0.190829,49,Goblet cell
5243,5244,4923.399911,9020.771416,93.383967,375.500003,0.541097,20,Goblet cell


## Splitting gem files based on the cell type of the meta file

In [6]:
print(obs.columns)
# keys are from obs column indeces
ct_key = 'cell_type'
ci_key = 'cell_id'
split_paths = craft.split_gem(gem_path, obs, ct_key, ci_key, gsep=',')

Index(['cell_id', 'center_x', 'center_y', 'girth', 'area', 'roundness',
       'mRNA_count', 'cell_type'],
      dtype='object')
split gem path of Myocyte : ./demo/merfish_ileum/transcripts.gem.csv.cell_type.Myocyte.tsv
split gem path of Neuron : ./demo/merfish_ileum/transcripts.gem.csv.cell_type.Neuron.tsv
split gem path of Goblet cell : ./demo/merfish_ileum/transcripts.gem.csv.cell_type.Goblet_cell.tsv
split gem path of TA : ./demo/merfish_ileum/transcripts.gem.csv.cell_type.TA.tsv
split gem path of Stem cell : ./demo/merfish_ileum/transcripts.gem.csv.cell_type.Stem_cell.tsv
split gem path of Paneth cell : ./demo/merfish_ileum/transcripts.gem.csv.cell_type.Paneth_cell.tsv
split gem path of Macrophage : ./demo/merfish_ileum/transcripts.gem.csv.cell_type.Macrophage.tsv
split gem path of B cell : ./demo/merfish_ileum/transcripts.gem.csv.cell_type.B_cell.tsv
split gem path of Enterocyte cell : ./demo/merfish_ileum/transcripts.gem.csv.cell_type.Enterocyte_cell.tsv
split gem path of Tuft c

## Run Cytocraft

In [7]:
ct_gem_path = './demo/merfish_ileum/transcripts.gem.csv.cell_type.Myocyte.tsv'
SN = os.path.basename(os.path.splitext(ct_gem_path)[0])
gem = craft.read_gem_as_csv(ct_gem_path, sep='\t')
adata = craft.read_gem_as_adata(ct_gem_path,sep='\t')

Adding adata in adata.obs['orig.ident'].
Adding data.position as adata.obsm['spatial'] .
Adding data.position as adata.obs['x'] and adata.obs['y'] .
Finished conversion to anndata.


In [9]:
adata = craft.craft(
        gem=gem,
        adata=adata,
        species='Mice',
        nderive=10,
        thresh=0.9,
        thresh_rmsd=0.25,
        seed=999,
        samplename=SN
    )

Speceis: Mice
Sample Name: transcripts.gem.csv.cell_type.Myocyte
Seed: 999
Cell Number: 570
Gene Number: 233
Threshold for gene filter is: 0.9
Number of genes used for Rotation Derivation is: 10
Task ID: 2C5U

Distance between X_new and X_old for loop 1 is: 0.9944597874366184
Distance between X_new and X_old for loop 2 is: 0.46834165701373814
Distance between X_new and X_old for loop 3 is: 0.2942829851542298
Distance between X_new and X_old for loop 4 is: 0.25051306290204695
Distance between X_new and X_old for loop 5 is: 0.20772909510718088
Number of total Transcription centers is: 66


## Draw plots

In [14]:
genechr_mice = craft.get_gene_chr(species='Mice')
X = adata.uns['X']
D = craft.gene_gene_distance_matrix(X)

66it [00:00, 217.62it/s]


In [None]:
# labels
labels=[]
for g in GeneLists['Lung5_Rep3 macrophage']:
    try:
        labels.append(genechr[g])
    except KeyError:
        labels.append('Unknown')
# cmap
cmap=dict(zip(np.unique(labels), zeileis_28))

In [None]:
plot_network(D, labels=labels, cmap=cmap, 
            csep='\n', corder=0, N_neighbor=1, html='GeneConf_Lung5_Rep3_macrophage.html', 
            width=1400, height=1000,edge_scale=0.1, edge_adjust=+1)