In [1]:
import pandas as pd
import numpy as np
import quilt3
from pathlib import Path
from aicsimageio.aics_image import AICSImage
from aicsimageio.writers.ome_tiff_writer import OmeTiffWriter



In [2]:
from tqdm import tqdm

Load variance dataset

In [42]:
pkg = quilt3.Package.browse("aics/hipsc_single_cell_image_dataset", registry="s3://allencell")

Downloading manifest: 100%|██████████| 574M/574M [00:07<00:00, 77.6MB/s]  
Loading manifest: 100%|██████████| 484465/484465 [00:36<00:00, 13.2k/s]


In [None]:
meta_df = pkg["metadata.csv"]()
meta_df.set_index("CellId", inplace=True)

In [None]:
meta_df.to_csv("/allen/aics/animated-cell/Saurabh/cellpack-analysis/data/variance_dataset.csv")

In [3]:
meta_df = pd.read_csv("/allen/aics/animated-cell/Saurabh/cellpack-analysis/data/variance_dataset.csv", index_col="CellId")

  meta_df = pd.read_csv("/allen/aics/animated-cell/Saurabh/cellpack-analysis/data/variance_dataset.csv", index_col="CellId")


In [4]:
print(meta_df.structure_name.unique())

['TOMM20' 'ACTB' 'CETN2' 'TUBA1B' 'LMNB1' 'DSP' 'SEC61B' 'ST6GAL1' 'SON'
 'GJA1' 'AAVS1' 'MYH10' 'TJP1' 'ACTN1' 'LAMP1' 'FBL' 'HIST1H2BJ' 'PXN'
 'NPM1' 'NUP153' 'ATP2A2' 'CTNNB1' 'RAB5A' 'SLC25A17' 'SMC1A']


In [5]:
structure_id = "SLC25A17"

In [6]:
struct_data = meta_df[meta_df["structure_name"] == structure_id]

In [7]:
struct_data.index

Index([743895, 743896, 743897, 743900, 743901, 743905, 743906, 743907, 743908,
       743912,
       ...
       839761, 839762, 839763, 839765, 839766, 839767, 839768, 839778, 839779,
       839782],
      dtype='int64', name='CellId', length=1997)

get cellIDs in 8d sphere

In [8]:
df_cellID_path = "/allen/aics/animated-cell/Saurabh/cellpack-analysis/data/8dsphere_ids.csv"

In [9]:
df_cellID = pd.read_csv(df_cellID_path)

In [10]:
df_cellID.set_index("structure", inplace=True)

In [11]:
str_cellid = df_cellID.loc[structure_id, "CellIds"].split(",")

In [12]:
cellid_list = []
for cellid in str_cellid:
    cellid_list.append(int(cellid.replace("[", "").replace("]", "")))


In [19]:
print(*cellid_list)

742470 742728 742740 742983 742998 743000 743004 743229 743467 743468 743471 743670 743695 744866 745069 745071 745072 745289 745303 768012 768285 768287 768830 768844 769069 769071 769076 769078 769673 769683 769684 769686 771334 771619 771623 771863 773253 773255 773256 773265 774868 775995 775997 775999 776009 779004 779005 779006 779613 780460 780462 780463 780465 780470 780970 780972 780978 780980 780987 781554 781562 782079 782083 782104 782105 782351 782355 782357 782369 782372 782653 783699 783925 783931 783937 809731 809742 809939 809946 809954 810410 810412 810668 810669 810678 810683 810889 810901 811098 811099 811105 811314 811323 811559 811855 812909 812913 812928 813149 813161 813400 813626 813897 813906 813923 814168 814680 815659 815661 815887 815888 815897 815912 816167 816168 816390 816394 816400 816401 816947 816955 816956 816962 816963 817201 817727 817729 817732 817945 817946 818225 818441 818449 818684 819144 819147 819167 819168 819170 819176 819178 819365 819370

Select cellIDs in 8d sphere

In [13]:
data = struct_data[struct_data.index.isin(cellid_list)].reset_index()
data.shape

(305, 1213)

In [34]:
data.columns

Index(['CellId', 'roi', 'crop_raw', 'crop_seg', 'name_dict', 'fov_path',
       'fov_seg_path', 'struct_seg_path', 'structure_name',
       'this_cell_nbr_complete',
       ...
       'MEM_shcoeffs_L16M16S_lcc', 'meta_fov_image_date', 'NUC_MEM_PC1',
       'NUC_MEM_PC2', 'NUC_MEM_PC3', 'NUC_MEM_PC4', 'NUC_MEM_PC5',
       'NUC_MEM_PC6', 'NUC_MEM_PC7', 'NUC_MEM_PC8'],
      dtype='object', length=1213)

Select all the cellIDs

In [14]:
data = struct_data.reset_index()
data.structure_name.unique()
data.shape

(1997, 1213)

Prepare file paths

In [15]:
save_path = Path(f"/allen/aics/animated-cell/Saurabh/cellpack-analysis/data/structure_data/{structure_id}/sample_8d/")
save_path.mkdir(exist_ok=True, parents=True)
raw_path = save_path / Path("unsegmented_imgs")
raw_path.mkdir(exist_ok=True, parents=True)

In [44]:
for row in tqdm(data.itertuples()):
    subdir_name = row.crop_raw.split("/")[0]
    file_name = row.crop_raw.split("/")[1]
    local_fn = raw_path / f"{row.structure_name}_{row.CellId}_ch_{row.ChannelNumberStruct}_crop_seg_original.tiff"
    pkg[subdir_name][file_name].fetch(local_fn)
print("Done")

100%|██████████| 267k/267k [00:02<00:00, 109kB/s]
100%|██████████| 324k/324k [00:02<00:00, 141kB/s]
100%|██████████| 295k/295k [00:02<00:00, 147kB/s]
100%|██████████| 328k/328k [00:01<00:00, 169kB/s]
100%|██████████| 262k/262k [00:02<00:00, 104kB/s]
100%|██████████| 324k/324k [00:02<00:00, 159kB/s]
100%|██████████| 318k/318k [00:03<00:00, 92.7kB/s]
100%|██████████| 329k/329k [00:01<00:00, 175kB/s]
100%|██████████| 284k/284k [00:02<00:00, 126kB/s]
100%|██████████| 261k/261k [00:02<00:00, 127kB/s]
100%|██████████| 289k/289k [00:02<00:00, 129kB/s]
100%|██████████| 293k/293k [00:01<00:00, 150kB/s]
100%|██████████| 289k/289k [00:02<00:00, 107kB/s] 
100%|██████████| 292k/292k [00:02<00:00, 133kB/s]
100%|██████████| 269k/269k [00:02<00:00, 104kB/s]
100%|██████████| 272k/272k [00:02<00:00, 117kB/s]
100%|██████████| 305k/305k [00:02<00:00, 117kB/s]
100%|██████████| 291k/291k [00:02<00:00, 138kB/s]
100%|██████████| 269k/269k [00:02<00:00, 132kB/s]
100%|██████████| 294k/294k [00:02<00:00, 113kB/s