In [None]:
import pandas as pd
import quilt3
from pathlib import Path
from tqdm.notebook import tqdm

### Load variance dataset from quilt and save locally

In [None]:
pkg = quilt3.Package.browse("aics/hipsc_single_cell_image_dataset", registry="s3://allencell")

In [None]:
meta_df = pkg["metadata.csv"]()
meta_df.set_index("CellId", inplace=True)

In [None]:
meta_df.to_csv("/allen/aics/animated-cell/Saurabh/cellpack-analysis/data/variance_dataset.csv")

In [None]:
meta_df = pd.read_csv("/allen/aics/animated-cell/Saurabh/cellpack-analysis/data/variance_dataset.csv", index_col="CellId")

In [None]:
print(meta_df.structure_name.unique())

### Set structure of interest
- `SLC25A17` is peroxisomes
- `RAB5A` is early endosomes

In [None]:
structure_id = "SLC25A17"

In [None]:
struct_data = meta_df[meta_df["structure_name"] == structure_id]

### Get cellIDs within 8D sphere in shape space (cells shaped close to average)

In [None]:
df_cellID_path = "/allen/aics/animated-cell/Saurabh/cellpack-analysis/data/8dsphere_ids.csv"

In [None]:
df_cellID = pd.read_csv(df_cellID_path)

In [None]:
df_cellID.set_index("structure", inplace=True)

In [None]:
str_cellid = df_cellID.loc[structure_id, "CellIds"].split(",")

In [None]:
cellid_list = []
for cellid in str_cellid:
    cellid_list.append(int(cellid.replace("[", "").replace("]", "")))


In [None]:
print(*cellid_list)

### Select cellIDs in 8d sphere from the dataframe

In [None]:
data = struct_data[struct_data.index.isin(cellid_list)].reset_index()
data.shape

### Alternatively select all the cellIDs

In [None]:
data = struct_data.reset_index()
data.structure_name.unique()
data.shape

### Prepare file paths

In [None]:
save_path = Path(f"/allen/aics/animated-cell/Saurabh/cellpack-analysis/data/structure_data/{structure_id}/sample_8d/")
save_path.mkdir(exist_ok=True, parents=True)
raw_path = save_path / Path("unsegmented_imgs")
raw_path.mkdir(exist_ok=True, parents=True)

In [None]:
for row in tqdm(data.itertuples()):
    subdir_name = row.crop_raw.split("/")[0]
    file_name = row.crop_raw.split("/")[1]
    local_fn = raw_path / f"{row.structure_name}_{row.CellId}_ch_{row.ChannelNumberStruct}_crop_seg_original.tiff"
    pkg[subdir_name][file_name].fetch(local_fn)
print("Done")