In [18]:
import numpy as np
import pandas as pd
import polars as pl
import os
import gc
import json
import random
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
import zarr
import napari

gc.enable()

pd.options.display.max_columns = None
#pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', None)

#pl.Config.set_tbl_rows(-1)
pl.Config.set_tbl_cols(-1)
pl.Config.set_fmt_str_lengths(10000)

polars.config.Config

In [2]:
path = 'I:/Kaggle/czii-cryo-et-object-identification/'

In [3]:
train_data_experiment_folders_path = path + 'train/static/ExperimentRuns/'
train_data_experiment_folders_path

'I:/Kaggle/czii-cryo-et-object-identification/train/static/ExperimentRuns/'

In [4]:
test_data_experiment_folders_path = path + 'test/static/ExperimentRuns/'
test_data_experiment_folders_path

'I:/Kaggle/czii-cryo-et-object-identification/test/static/ExperimentRuns/'

In [5]:
train_data_experiments = os.listdir(train_data_experiment_folders_path)
train_data_experiments

['TS_5_4', 'TS_69_2', 'TS_6_4', 'TS_6_6', 'TS_73_6', 'TS_86_3', 'TS_99_9']

In [6]:
test_data_experiments = os.listdir(test_data_experiment_folders_path)
test_data_experiments

['TS_5_4', 'TS_69_2', 'TS_6_4']

In [7]:
data_dict = {}
for experiment in tqdm(train_data_experiments):
    image_types_dict = {}    
    image_types_dict['denoised'] = zarr.open(train_data_experiment_folders_path + f'{experiment}/VoxelSpacing10.000/denoised.zarr', mode='r')
    image_types_dict['iso'] = zarr.open(train_data_experiment_folders_path + f'{experiment}/VoxelSpacing10.000/isonetcorrected.zarr', mode='r')
    image_types_dict['dcon'] = zarr.open(train_data_experiment_folders_path + f'{experiment}/VoxelSpacing10.000/ctfdeconvolved.zarr', mode='r')
    image_types_dict['wbp'] = zarr.open(train_data_experiment_folders_path + f'{experiment}/VoxelSpacing10.000/wbp.zarr', mode='r')
    data_dict[experiment] = image_types_dict

100%|███████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 100.81it/s]


In [8]:
data_dict

{'TS_5_4': {'denoised': <Group file://I:/Kaggle/czii-cryo-et-object-identification/train/static/ExperimentRuns/TS_5_4/VoxelSpacing10.000/denoised.zarr>,
  'iso': <Group file://I:/Kaggle/czii-cryo-et-object-identification/train/static/ExperimentRuns/TS_5_4/VoxelSpacing10.000/isonetcorrected.zarr>,
  'dcon': <Group file://I:/Kaggle/czii-cryo-et-object-identification/train/static/ExperimentRuns/TS_5_4/VoxelSpacing10.000/ctfdeconvolved.zarr>,
  'wbp': <Group file://I:/Kaggle/czii-cryo-et-object-identification/train/static/ExperimentRuns/TS_5_4/VoxelSpacing10.000/wbp.zarr>},
 'TS_69_2': {'denoised': <Group file://I:/Kaggle/czii-cryo-et-object-identification/train/static/ExperimentRuns/TS_69_2/VoxelSpacing10.000/denoised.zarr>,
  'iso': <Group file://I:/Kaggle/czii-cryo-et-object-identification/train/static/ExperimentRuns/TS_69_2/VoxelSpacing10.000/isonetcorrected.zarr>,
  'dcon': <Group file://I:/Kaggle/czii-cryo-et-object-identification/train/static/ExperimentRuns/TS_69_2/VoxelSpacing10.00

In [9]:
test_data_dict = {}
for experiment in tqdm(test_data_experiments):
    image_types_dict = {}    
    image_types_dict['denoised'] = zarr.open(test_data_experiment_folders_path + f'{experiment}/VoxelSpacing10.000/denoised.zarr', mode='r')
    test_data_dict[experiment] = image_types_dict

100%|███████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 592.95it/s]


In [10]:
test_data_dict

{'TS_5_4': {'denoised': <Group file://I:/Kaggle/czii-cryo-et-object-identification/test/static/ExperimentRuns/TS_5_4/VoxelSpacing10.000/denoised.zarr>},
 'TS_69_2': {'denoised': <Group file://I:/Kaggle/czii-cryo-et-object-identification/test/static/ExperimentRuns/TS_69_2/VoxelSpacing10.000/denoised.zarr>},
 'TS_6_4': {'denoised': <Group file://I:/Kaggle/czii-cryo-et-object-identification/test/static/ExperimentRuns/TS_6_4/VoxelSpacing10.000/denoised.zarr>}}

In [11]:
train_label_experiment_folders_path = path + 'train/overlay/ExperimentRuns/'
train_label_experiment_folders_path

'I:/Kaggle/czii-cryo-et-object-identification/train/overlay/ExperimentRuns/'

In [12]:
train_label_experiments = os.listdir(train_label_experiment_folders_path)
train_label_experiments

['TS_5_4', 'TS_69_2', 'TS_6_4', 'TS_6_6', 'TS_73_6', 'TS_86_3', 'TS_99_9']

In [13]:
labels_dict = {}
for experiment in tqdm(train_label_experiments):
    particle_types_dict = {}
    
    with open(f'{train_label_experiment_folders_path}{experiment}/Picks/apo-ferritin.json') as f:
        loaded_json = json.loads(f.read())
    particle_types_dict['apo-ferritin'] = loaded_json

    '''
    with open(f'{train_label_experiment_folders_path}{experiment}/Picks/beta-amylase.json') as f:
        loaded_json = json.loads(f.read())
    particle_types_dict['beta-amylase'] = loaded_json
    '''

    with open(f'{train_label_experiment_folders_path}{experiment}/Picks/beta-galactosidase.json') as f:
        loaded_json = json.loads(f.read())
    particle_types_dict['beta-galactosidase'] = loaded_json

    with open(f'{train_label_experiment_folders_path}{experiment}/Picks/ribosome.json') as f:
        loaded_json = json.loads(f.read())
    particle_types_dict['ribosome'] = loaded_json

    with open(f'{train_label_experiment_folders_path}{experiment}/Picks/thyroglobulin.json') as f:
        loaded_json = json.loads(f.read())
    particle_types_dict['thyroglobulin'] = loaded_json

    with open(f'{train_label_experiment_folders_path}{experiment}/Picks/virus-like-particle.json') as f:
        loaded_json = json.loads(f.read())
    particle_types_dict['virus-like-particle'] = loaded_json

    labels_dict[experiment] = particle_types_dict

100%|███████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 693.58it/s]


In [14]:
labels_dict

{'TS_5_4': {'apo-ferritin': {'pickable_object_name': 'apo-ferritin',
   'user_id': 'curation',
   'session_id': '0',
   'run_name': 'TS_5_4',
   'voxel_spacing': None,
   'unit': 'angstrom',
   'points': [{'location': {'x': 468.514, 'y': 5915.906, 'z': 604.167},
     'transformation_': [[1.0, 0.0, 0.0, 0.0],
      [0.0, 1.0, 0.0, 0.0],
      [0.0, 0.0, 1.0, 0.0],
      [0.0, 0.0, 0.0, 1.0]],
     'instance_id': 0},
    {'location': {'x': 5674.694, 'y': 1114.354, 'z': 565.068},
     'transformation_': [[1.0, 0.0, 0.0, 0.0],
      [0.0, 1.0, 0.0, 0.0],
      [0.0, 0.0, 1.0, 0.0],
      [0.0, 0.0, 0.0, 1.0]],
     'instance_id': 0},
    {'location': {'x': 5744.509, 'y': 1049.172, 'z': 653.712},
     'transformation_': [[1.0, 0.0, 0.0, 0.0],
      [0.0, 1.0, 0.0, 0.0],
      [0.0, 0.0, 1.0, 0.0],
      [0.0, 0.0, 0.0, 1.0]],
     'instance_id': 0},
    {'location': {'x': 5880.769, 'y': 1125.348, 'z': 579.56},
     'transformation_': [[1.0, 0.0, 0.0, 0.0],
      [0.0, 1.0, 0.0, 0.0],
      

In [15]:
experiment_list = []
particle_type_list = []
x_list = []
y_list = []
z_list = []
for experiment in tqdm(test_data_experiments):
    #print(experiment)
    #print(len(labels_dict[experiment]['apo-ferritin']['points']))
    #print(type(labels_dict[experiment]['apo-ferritin']['points']))
    #print(labels_dict[experiment]['apo-ferritin']['points'][0])

    for key in labels_dict[experiment].keys():
        #print(labels_dict[experiment][key])
        #print(labels_dict[experiment][key]['pickable_object_name'])
        for i in range(len(labels_dict[experiment][key]['points'])):
            experiment_list.append(labels_dict[experiment][key]['run_name'])
            particle_type_list.append(labels_dict[experiment][key]['pickable_object_name'])
            x_list.append(labels_dict[experiment][key]['points'][i]['location']['x'])
            y_list.append(labels_dict[experiment][key]['points'][i]['location']['y'])
            z_list.append(labels_dict[experiment][key]['points'][i]['location']['z'])

100%|████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<?, ?it/s]


In [16]:
print(len(experiment_list))
print(len(particle_type_list))

445
445


In [17]:
labels_df = pd.DataFrame({'experiment':experiment_list, 'particle_type':particle_type_list, 'x':x_list, 'y':y_list, 'z':z_list})
print(labels_df.shape)
labels_df

(445, 5)


Unnamed: 0,experiment,particle_type,x,y,z
0,TS_5_4,apo-ferritin,468.514,5915.906,604.167
1,TS_5_4,apo-ferritin,5674.694,1114.354,565.068
2,TS_5_4,apo-ferritin,5744.509,1049.172,653.712
3,TS_5_4,apo-ferritin,5880.769,1125.348,579.560
4,TS_5_4,apo-ferritin,4661.667,1269.497,810.409
...,...,...,...,...,...
440,TS_6_4,virus-like-particle,5088.704,4120.923,981.513
441,TS_6_4,virus-like-particle,4268.076,2814.277,815.446
442,TS_6_4,virus-like-particle,5211.319,5766.513,877.832
443,TS_6_4,virus-like-particle,4509.570,5139.077,1161.950


In [19]:
particle_radius = {
    'apo-ferritin': 60 /2,
    'beta-amylase': 65 /2,
    'beta-galactosidase': 90 /2,
    'ribosome': 150 /2,
    'thyroglobulin': 130 /2,
    'virus-like-particle': 135 /2,
}

In [20]:
apos = labels_dict['TS_5_4']['apo-ferritin']['points']
apo_coordinates_list = []
for i in range(len(apos)):
    apo_coordinates_list.append([apos[i]['location']['z']/10, apos[i]['location']['y']/10, apos[i]['location']['x']/10])
apo_coordinates_array = np.array(apo_coordinates_list)

betas = labels_dict['TS_5_4']['beta-galactosidase']['points']
beta_coordinates_list = []
for i in range(len(betas)):
    beta_coordinates_list.append([betas[i]['location']['z']/10, betas[i]['location']['y']/10, betas[i]['location']['x']/10])
beta_coordinates_array = np.array(beta_coordinates_list)

ribos = labels_dict['TS_5_4']['ribosome']['points']
ribo_coordinates_list = []
for i in range(len(ribos)):
    ribo_coordinates_list.append([ribos[i]['location']['z']/10, ribos[i]['location']['y']/10, ribos[i]['location']['x']/10])
ribo_coordinates_array = np.array(ribo_coordinates_list)

thyros = labels_dict['TS_5_4']['thyroglobulin']['points']
thyro_coordinates_list = []
for i in range(len(thyros)):
    thyro_coordinates_list.append([thyros[i]['location']['z']/10, thyros[i]['location']['y']/10, thyros[i]['location']['x']/10])
thyro_coordinates_array = np.array(thyro_coordinates_list)

virus = labels_dict['TS_5_4']['virus-like-particle']['points']
virus_coordinates_list = []
for i in range(len(virus)):
    virus_coordinates_list.append([virus[i]['location']['z']/10, virus[i]['location']['y']/10, virus[i]['location']['x']/10])
virus_coordinates_array = np.array(virus_coordinates_list)

In [25]:
image = data_dict['TS_5_4']['denoised']['0']

In [26]:
# Open Napari viewer
viewer = napari.Viewer()
viewer.add_image(image, colormap="gray", name="CryoET Image")

# Add points layer for enzyme locations
viewer.add_points(
    apo_coordinates_array, 
    size=particle_radius['apo-ferritin'], 
    face_color='transparent', 
    border_color='green',
    opacity=1,
    name="apo-ferritin Locations",
    out_of_slice_display=True
)

viewer.add_points(
    beta_coordinates_array, 
    size=particle_radius['beta-galactosidase'], 
    face_color='transparent', 
    border_color='blue',
    opacity=1,
    name="beta-galactosidase Locations",
    out_of_slice_display=True
)

viewer.add_points(
    ribo_coordinates_array, 
    size=particle_radius['ribosome'], 
    face_color='transparent', 
    border_color='brown',
    opacity=1,
    name="ribosome Locations",
    out_of_slice_display=True
)

viewer.add_points(
    thyro_coordinates_array, 
    size=particle_radius['thyroglobulin'], 
    face_color='transparent', 
    border_color='yellow',
    opacity=1,
    name="thyroglobulin Locations",
    out_of_slice_display=True
)

viewer.add_points(
    virus_coordinates_array, 
    size=particle_radius['virus-like-particle'], 
    face_color='transparent', 
    border_color='red',
    opacity=1,
    name="virus-like-particle Locations",
    out_of_slice_display=True
)

<Points layer 'virus-like-particle Locations' at 0x1fd2537f390>

# References

1. https://www.kaggle.com/code/davidlist/experiment-ts-6-4-visualization
2. https://www.kaggle.com/code/nk35jk/3d-visualization-of-particles