In [1]:
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import squidpy as sq
from sklearn.metrics.pairwise import cosine_similarity
import statistics
import pandas as pd
import json
import pickle
from anndata import AnnData
import pathlib
import skimage
import seaborn as sns
import tangram as tg
from scipy.spatial import distance

  from .autonotebook import tqdm as notebook_tqdm


### Load MERFISH data

In [2]:
!pwd

/home/apon/thesis/EM_results


In [2]:
adata_mfish = sc.read_h5ad("../lucas_data/real_st_spapros_merfish.h5ad")

In [6]:
adata_mfish.obs["pseudo_spot"]

Cell
Cell_1       241
Cell_2       241
Cell_3       241
Cell_4       241
Cell_5        71
            ... 
Cell_5796    311
Cell_5797    507
Cell_5798     67
Cell_5799    482
Cell_5800     58
Name: pseudo_spot, Length: 5793, dtype: int32

In [9]:
adata_mfish.uns["voxelized_subdata"].obs

Unnamed: 0_level_0,X,Y,cell_counts,cell_type
pseudo_spot,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,3516.486328,4699.578613,7,4
1,3399.005127,8534.597656,12,1
2,3613.207520,1480.792358,8,6
3,1851.322754,6547.989746,12,1
4,2347.393555,2829.515625,11,1
...,...,...,...,...
574,3373.220947,2085.714600,11,1
575,3975.684814,2688.537354,9,1
576,1884.602539,8211.518555,10,7
577,3777.854248,1597.564697,10,1


In [6]:
adata_mfish.uns["voxelized_subdata"].obsm["cell_type_distribution"].shape

(579, 20)

In [7]:
adata_mfish.obs["pseudo_spot"]

Cell
Cell_1       241
Cell_2       241
Cell_3       241
Cell_4       241
Cell_5        71
            ... 
Cell_5796    311
Cell_5797    507
Cell_5798     67
Cell_5799    482
Cell_5800     58
Name: pseudo_spot, Length: 5793, dtype: int32

### SUBSET 300 FAKE VOXELS

In [16]:
adata_sub = adata_mfish.uns["voxelized_subdata"]

In [15]:
adata_sub.obs_names

Index(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
       ...
       '569', '570', '571', '572', '573', '574', '575', '576', '577', '578'],
      dtype='object', name='pseudo_spot', length=579)

In [17]:
np.random.seed(22)

selected_indices = np.random.choice(adata_sub.n_obs, size=300, replace=False) 

adata_mfish_300 = adata_sub[selected_indices].copy()

In [22]:
for i in adata_mfish_300.obs_names:
    print(i)

555
282
492
380
28
46
158
59
123
344
311
22
339
499
194
102
137
577
108
525
420
352
2
355
578
350
120
415
360
271
500
30
199
400
204
300
335
406
233
565
548
116
357
185
542
111
242
309
365
483
384
375
215
513
575
261
391
88
172
17
487
122
203
572
507
130
284
148
362
364
163
544
41
197
221
167
101
303
556
441
338
262
268
127
334
407
1
89
173
458
398
134
146
292
154
438
371
263
571
558
56
457
5
35
308
361
306
473
219
553
503
244
488
401
228
3
67
559
202
416
31
77
40
412
363
119
336
139
216
374
522
196
319
264
177
193
543
373
73
569
178
245
452
329
333
235
115
166
530
379
58
451
13
304
402
269
448
332
297
71
239
188
240
42
446
29
164
536
254
342
39
65
474
324
224
266
66
155
388
23
515
140
149
157
489
162
312
206
549
152
296
126
26
435
47
213
411
94
454
267
243
471
504
414
506
528
121
434
567
498
253
86
85
250
189
495
174
347
106
351
327
486
345
497
90
399
394
276
285
222
386
33
169
538
421
218
430
432
320
302
79
98
114
409
450
389
43
208
255
4
57
481
428
237
32
532
82
48
325
288
437
310
3

In [18]:
adata_mfish_300.uns["selected_voxels"] = selected_indices

In [19]:
adata_mfish_300.uns["selected_voxels"]

array([555, 282, 492, 380,  28,  46, 158,  59, 123, 344, 311,  22, 339,
       499, 194, 102, 137, 577, 108, 525, 420, 352,   2, 355, 578, 350,
       120, 415, 360, 271, 500,  30, 199, 400, 204, 300, 335, 406, 233,
       565, 548, 116, 357, 185, 542, 111, 242, 309, 365, 483, 384, 375,
       215, 513, 575, 261, 391,  88, 172,  17, 487, 122, 203, 572, 507,
       130, 284, 148, 362, 364, 163, 544,  41, 197, 221, 167, 101, 303,
       556, 441, 338, 262, 268, 127, 334, 407,   1,  89, 173, 458, 398,
       134, 146, 292, 154, 438, 371, 263, 571, 558,  56, 457,   5,  35,
       308, 361, 306, 473, 219, 553, 503, 244, 488, 401, 228,   3,  67,
       559, 202, 416,  31,  77,  40, 412, 363, 119, 336, 139, 216, 374,
       522, 196, 319, 264, 177, 193, 543, 373,  73, 569, 178, 245, 452,
       329, 333, 235, 115, 166, 530, 379,  58, 451,  13, 304, 402, 269,
       448, 332, 297,  71, 239, 188, 240,  42, 446,  29, 164, 536, 254,
       342,  39,  65, 474, 324, 224, 266,  66, 155, 388,  23, 51

### Export adata_mfish_300 as spatial data to feed Tg and use adata_mfish as the single cell data to run candidates.py

With candidates I run EM algorithm

In [12]:
adata_mfish_300.write("voxalized_mfish_for_EM.h5ad")